diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2547a1e0b8f9ea8fab614fbf9c0be4ad0f34a0e9..208d7b94aec6e0ceb101807cfa26556cc576b5bd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5278,27 +5278,45 @@ spia_peddr= split_lock_detect= - [X86] Enable split lock detection + [X86] Enable split lock detection or bus lock detection When enabled (and if hardware support is present), atomic instructions that access data across cache line - boundaries will result in an alignment check exception. + boundaries will result in an alignment check exception + for split lock detection or a debug exception for + bus lock detection. off - not enabled - warn - the kernel will emit rate limited warnings + warn - the kernel will emit rate-limited warnings about applications triggering the #AC - exception. This mode is the default on CPUs - that supports split lock detection. + exception or the #DB exception. This mode is + the default on CPUs that support split lock + detection or bus lock detection. Default + behavior is by #AC if both features are + enabled in hardware. fatal - the kernel will send SIGBUS to applications - that trigger the #AC exception. + that trigger the #AC exception or the #DB + exception. Default behavior is by #AC if + both features are enabled in hardware. + + ratelimit:N - + Set system wide rate limit to N bus locks + per second for bus lock detection. + 0 < N <= 1000. + + N/A for split lock detection. + If an #AC exception is hit in the kernel or in firmware (i.e. not while executing in user mode) the kernel will oops in either "warn" or "fatal" mode. + #DB exception for bus lock is triggered only when + CPL > 0. + srbds= [X86,INTEL] Control the Special Register Buffer Data Sampling (SRBDS) mitigation. diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst new file mode 100644 index 0000000000000000000000000000000000000000..159ff6ba830ee94851b33c3e658c04f1b4487df8 --- /dev/null +++ b/Documentation/x86/buslock.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: + +=============================== +Bus lock detection and handling +=============================== + +:Copyright: |copy| 2021 Intel Corporation +:Authors: - Fenghua Yu + - Tony Luck + +Problem +======= + +A split lock is any atomic operation whose operand crosses two cache lines. +Since the operand spans two cache lines and the operation must be atomic, +the system locks the bus while the CPU accesses the two cache lines. + +A bus lock is acquired through either split locked access to writeback (WB) +memory or any locked access to non-WB memory. This is typically thousands of +cycles slower than an atomic operation within a cache line. It also disrupts +performance on other cores and brings the whole system to its knees. + +Detection +========= + +Intel processors may support either or both of the following hardware +mechanisms to detect split locks and bus locks. + +#AC exception for split lock detection +-------------------------------------- + +Beginning with the Tremont Atom CPU split lock operations may raise an +Alignment Check (#AC) exception when a split lock operation is attemped. + +#DB exception for bus lock detection +------------------------------------ + +Some CPUs have the ability to notify the kernel by an #DB trap after a user +instruction acquires a bus lock and is executed. This allows the kernel to +terminate the application or to enforce throttling. + +Software handling +================= + +The kernel #AC and #DB handlers handle bus lock based on the kernel +parameter "split_lock_detect". Here is a summary of different options: + ++------------------+----------------------------+-----------------------+ +|split_lock_detect=|#AC for split lock |#DB for bus lock | ++------------------+----------------------------+-----------------------+ +|off |Do nothing |Do nothing | ++------------------+----------------------------+-----------------------+ +|warn |Kernel OOPs |Warn once per task and | +|(default) |Warn once per task and |and continues to run. | +| |disable future checking | | +| |When both features are | | +| |supported, warn in #AC | | ++------------------+----------------------------+-----------------------+ +|fatal |Kernel OOPs |Send SIGBUS to user. | +| |Send SIGBUS to user | | +| |When both features are | | +| |supported, fatal in #AC | | ++------------------+----------------------------+-----------------------+ + +Usages +====== + +Detecting and handling bus lock may find usages in various areas: + +It is critical for real time system designers who build consolidated real +time systems. These systems run hard real time code on some cores and run +"untrusted" user processes on other cores. The hard real time cannot afford +to have any bus lock from the untrusted processes to hurt real time +performance. To date the designers have been unable to deploy these +solutions as they have no way to prevent the "untrusted" user code from +generating split lock and bus lock to block the hard real time code to +access memory during bus locking. + +It's also useful for general computing to prevent guests or user +applications from slowing down the overall system by executing instructions +with bus lock. + + +Guidance +======== +off +--- + +Disable checking for split lock and bus lock. This option can be useful if +there are legacy applications that trigger these events at a low rate so +that mitigation is not needed. + +warn +---- + +A warning is emitted when a bus lock is detected which allows to identify +the offending application. This is the default behavior. + +fatal +----- + +In this case, the bus lock is not tolerated and the process is killed. diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index cac3ee1038b988d43f18e8437950a435cec08043..b345cdbf5ad31f84f7019a88879f50c0efd40652 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -30,6 +30,7 @@ x86-specific Documentation microcode resctrl_ui tsx_async_abort + buslock usb-legacy-support i386/index x86_64/index diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index da78ccbd493b77e30c0b56a2fa8c7267835ffdfb..0d7fc0e2bfc9e6403f87436faa421bfdfec97e0a 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -41,12 +41,13 @@ unsigned int x86_family(unsigned int sig); unsigned int x86_model(unsigned int sig); unsigned int x86_stepping(unsigned int sig); #ifdef CONFIG_CPU_SUP_INTEL -extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c); +extern void __init sld_setup(struct cpuinfo_x86 *c); extern void switch_to_sld(unsigned long tifn); extern bool handle_user_split_lock(struct pt_regs *regs, long error_code); extern bool handle_guest_split_lock(unsigned long ip); +extern void handle_bus_lock(struct pt_regs *regs); #else -static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {} +static inline void __init sld_setup(struct cpuinfo_x86 *c) {} static inline void switch_to_sld(unsigned long tifn) {} static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code) { @@ -57,6 +58,8 @@ static inline bool handle_guest_split_lock(unsigned long ip) { return false; } + +static inline void handle_bus_lock(struct pt_regs *regs) {} #endif #ifdef CONFIG_IA32_FEAT_CTL void init_ia32_feat_ctl(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d79507141ce13ba77c3d72968bd8184a132c1a2d..8e93fde24a7a8a19c0def361108a2fc2a1dee304 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -358,6 +358,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b93228bb1ca4f1d646f6022cdcd93958718b33c2..c1be8eaf43049184b2026d9cdcf33941a1fb2180 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -289,6 +289,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h index d95d080b30e3ea833a5ddbe47f95e1195c6870a1..0007ba077c0c2beac746d7d51e9e00809cfb4fed 100644 --- a/arch/x86/include/uapi/asm/debugreg.h +++ b/arch/x86/include/uapi/asm/debugreg.h @@ -24,6 +24,7 @@ #define DR_TRAP3 (0x8) /* db3 */ #define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) +#define DR_BUS_LOCK (0x800) /* bus_lock */ #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e0eada23554755a5afdbd9aa4f48b961bd1b6b87..cc463b00875249b3d86478b5c84929531795a0b0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1370,7 +1370,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_set_bug_bits(c); - cpu_set_core_cap_bits(c); + sld_setup(c); fpu__init_system(c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0e422a5448351c6d418a3bc40f40b39e5d7c2e1d..37f3c38d645f875ec7f7769504c5b3b37aa98da9 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -41,12 +42,13 @@ enum split_lock_detect_state { sld_off = 0, sld_warn, sld_fatal, + sld_ratelimit, }; /* - * Default to sld_off because most systems do not support split lock detection - * split_lock_setup() will switch this to sld_warn on systems that support - * split lock detect, unless there is a command line override. + * Default to sld_off because most systems do not support split lock detection. + * sld_state_setup() will switch this to sld_warn on systems that support + * split lock/bus lock detect, unless there is a command line override. */ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; @@ -603,6 +605,7 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) } static void split_lock_init(void); +static void bus_lock_init(void); static void init_intel(struct cpuinfo_x86 *c) { @@ -720,6 +723,7 @@ static void init_intel(struct cpuinfo_x86 *c) tsx_disable(); split_lock_init(); + bus_lock_init(); intel_init_thermal(c); } @@ -995,13 +999,30 @@ static const struct { { "off", sld_off }, { "warn", sld_warn }, { "fatal", sld_fatal }, + { "ratelimit:", sld_ratelimit }, }; +static struct ratelimit_state bld_ratelimit; + static inline bool match_option(const char *arg, int arglen, const char *opt) { - int len = strlen(opt); + int len = strlen(opt), ratelimit; + + if (strncmp(arg, opt, len)) + return false; - return len == arglen && !strncmp(arg, opt, len); + /* + * Min ratelimit is 1 bus lock/sec. + * Max ratelimit is 1000 bus locks/sec. + */ + if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && + ratelimit > 0 && ratelimit <= 1000) { + ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); + ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); + return true; + } + + return len == arglen; } static bool split_lock_verify_msr(bool on) @@ -1020,16 +1041,15 @@ static bool split_lock_verify_msr(bool on) return ctrl == tmp; } -static void __init split_lock_setup(void) +static void __init sld_state_setup(void) { enum split_lock_detect_state state = sld_warn; char arg[20]; int i, ret; - if (!split_lock_verify_msr(false)) { - pr_info("MSR access failed: Disabled\n"); + if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - } ret = cmdline_find_option(boot_command_line, "split_lock_detect", arg, sizeof(arg)); @@ -1041,17 +1061,14 @@ static void __init split_lock_setup(void) } } } + sld_state = state; +} - switch (state) { - case sld_off: - pr_info("disabled\n"); +static void __init __split_lock_setup(void) +{ + if (!split_lock_verify_msr(false)) { + pr_info("MSR access failed: Disabled\n"); return; - case sld_warn: - pr_info("warning about user-space split_locks\n"); - break; - case sld_fatal: - pr_info("sending SIGBUS on user-space split_locks\n"); - break; } rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); @@ -1061,7 +1078,9 @@ static void __init split_lock_setup(void) return; } - sld_state = state; + /* Restore the MSR to its cached value. */ + wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); + setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); } @@ -1082,6 +1101,15 @@ static void sld_update_msr(bool on) static void split_lock_init(void) { + /* + * #DB for bus lock handles ratelimit and #AC for split lock is + * disabled. + */ + if (sld_state == sld_ratelimit) { + split_lock_verify_msr(false); + return; + } + if (cpu_model_supports_sld) split_lock_verify_msr(sld_state != sld_off); } @@ -1118,6 +1146,29 @@ bool handle_guest_split_lock(unsigned long ip) } EXPORT_SYMBOL_GPL(handle_guest_split_lock); +static void bus_lock_init(void) +{ + u64 val; + + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || + (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) + return; + + /* + * Enable #DB for bus lock. All bus locks are handled in #DB except + * split locks are handled in #AC in the fatal case. + */ + rdmsrl(MSR_IA32_DEBUGCTLMSR, val); + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); +} + bool handle_user_split_lock(struct pt_regs *regs, long error_code) { if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) @@ -1126,6 +1177,27 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code) return true; } +void handle_bus_lock(struct pt_regs *regs) +{ + switch (sld_state) { + case sld_off: + break; + case sld_ratelimit: + /* Enforce no more than bld_ratelimit bus locks/sec. */ + while (!__ratelimit(&bld_ratelimit)) + msleep(20); + /* Warn on the bus lock. */ + fallthrough; + case sld_warn: + pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", + current->comm, current->pid, regs->ip); + break; + case sld_fatal: + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); + break; + } +} + /* * This function is called only when switching between tasks with * different split-lock detection modes. It sets the MSR for the @@ -1166,7 +1238,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { {} }; -void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) +static void __init split_lock_setup(struct cpuinfo_x86 *c) { const struct x86_cpu_id *m; u64 ia32_core_caps; @@ -1193,5 +1265,44 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) } cpu_model_supports_sld = true; - split_lock_setup(); + __split_lock_setup(); +} + +static void sld_state_show(void) +{ + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && + !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + return; + + switch (sld_state) { + case sld_off: + pr_info("disabled\n"); + break; + case sld_warn: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) + pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); + else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: warning on user-space bus_locks\n"); + break; + case sld_fatal: + if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { + pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); + } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { + pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", + boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? + " from non-WB" : ""); + } + break; + case sld_ratelimit: + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); + break; + } +} + +void __init sld_setup(struct cpuinfo_x86 *c) +{ + split_lock_setup(c); + sld_state_setup(); + sld_state_show(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d9132c9966011a7e2a7ab9e0e1c92fb53350ce13..a47529436a6d72a6214e239fed043c9ae81fd8b8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -1023,6 +1023,10 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, goto out_irq; } + /* #DB for bus lock can only be triggered from userspace. */ + if (dr6 & DR_BUS_LOCK) + handle_bus_lock(regs); + /* Add the virtual_dr6 bits for signals. */ dr6 |= current->thread.virtual_dr6; if (dr6 & (DR_STEP | DR_TRAP_BITS) || icebp)