diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 17811002a8f7cdf54fb065e2b65a60472f2f6d3e..392b4ac81af92d890e218b0e004c6646644cb68c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3428,6 +3428,8 @@ return indicates the attribute is implemented. It does not necessarily indicate that the attribute can be read or written in the device's current state. "addr" is ignored. +.. _KVM_ARM_VCPU_INIT: + 4.82 KVM_ARM_VCPU_INIT ---------------------- @@ -6128,6 +6130,56 @@ writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG interface. No error will be returned, but the resulting offset will not be applied. +.. _KVM_ARM_GET_REG_WRITABLE_MASKS: + +4.139 KVM_ARM_GET_REG_WRITABLE_MASKS +------------------------------------------- + +:Capability: KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES +:Architectures: arm64 +:Type: vm ioctl +:Parameters: struct reg_mask_range (in/out) +:Returns: 0 on success, < 0 on error + + +:: + + #define KVM_ARM_FEATURE_ID_RANGE 0 + #define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + + struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; + }; + +This ioctl copies the writable masks for a selected range of registers to +userspace. + +The ``addr`` field is a pointer to the destination array where KVM copies +the writable masks. + +The ``range`` field indicates the requested range of registers. +``KVM_CHECK_EXTENSION`` for the ``KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES`` +capability returns the supported ranges, expressed as a set of flags. Each +flag's bit index represents a possible value for the ``range`` field. +All other values are reserved for future use and KVM may return an error. + +The ``reserved[13]`` array is reserved for future use and should be 0, or +KVM may return an error. + +KVM_ARM_FEATURE_ID_RANGE (0) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Feature ID range is defined as the AArch64 System register space with +op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, op2=={0-7}. + +The mask returned array pointed to by ``addr`` is indexed by the macro +``ARM64_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2)``, allowing userspace +to know what fields can be changed for the system register described by +``op0, op1, crn, crm, op2``. KVM rejects ID register values that describe a +superset of the features supported by the system. + 5. The kvm_run structure ======================== diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index e84848432158084f789b0c18764afd1f3fb547ac..7f231c724e16756f63b2a38e9e2df6a60727d2ee 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -11,3 +11,4 @@ ARM hypercalls pvtime ptp_kvm + vcpu-features diff --git a/Documentation/virt/kvm/arm/vcpu-features.rst b/Documentation/virt/kvm/arm/vcpu-features.rst new file mode 100644 index 0000000000000000000000000000000000000000..f7cc6d8d8b74052616acbf0dab3b92ec13badf22 --- /dev/null +++ b/Documentation/virt/kvm/arm/vcpu-features.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +vCPU feature selection on arm64 +=============================== + +KVM/arm64 provides two mechanisms that allow userspace to configure +the CPU features presented to the guest. + +KVM_ARM_VCPU_INIT +================= + +The ``KVM_ARM_VCPU_INIT`` ioctl accepts a bitmap of feature flags +(``struct kvm_vcpu_init::features``). Features enabled by this interface are +*opt-in* and may change/extend UAPI. See :ref:`KVM_ARM_VCPU_INIT` for complete +documentation of the features controlled by the ioctl. + +Otherwise, all CPU features supported by KVM are described by the architected +ID registers. + +The ID Registers +================ + +The Arm architecture specifies a range of *ID Registers* that describe the set +of architectural features supported by the CPU implementation. KVM initializes +the guest's ID registers to the maximum set of CPU features supported by the +system. The ID register values may be VM-scoped in KVM, meaning that the +values could be shared for all vCPUs in a VM. + +KVM allows userspace to *opt-out* of certain CPU features described by the ID +registers by writing values to them via the ``KVM_SET_ONE_REG`` ioctl. The ID +registers are mutable until the VM has started, i.e. userspace has called +``KVM_RUN`` on at least one vCPU in the VM. Userspace can discover what fields +are mutable in the ID registers using the ``KVM_ARM_GET_REG_WRITABLE_MASKS``. +See the :ref:`ioctl documentation ` for more +details. + +Userspace is allowed to *limit* or *mask* CPU features according to the rules +outlined by the architecture in DDI0487J.a D19.1.3 'Principles of the ID +scheme for fields in ID register'. KVM does not allow ID register values that +exceed the capabilities of the system. + +.. warning:: + It is **strongly recommended** that userspace modify the ID register values + before accessing the rest of the vCPU's CPU register state. KVM may use the + ID register values to control feature emulation. Interleaving ID register + modification with other system register accesses may lead to unpredictable + behavior. diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d68307a50d13ec2c98e162e6809bd4dd670f1bec..683ebd5b7e962ccafcb7d8050e3b22322a933a76 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -651,6 +651,20 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } +static inline bool supports_cnp(int scope) +{ + u64 mmfr2; + u8 cnp; + + if (scope == SCOPE_LOCAL_CPU) + mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1); + else + mmfr2 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + + cnp = cpuid_feature_extract_unsigned_field(mmfr2, + ID_AA64MMFR2_EL1_CnP_SHIFT); + return cnp == 1; +} static inline bool supports_csv2p3(int scope) { diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index f0b10cb2c87dd1a9cd67cc3e641d6a6324a8e19c..79c4a62de5c46e3622d650fdadb611375296f25c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -91,7 +91,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) } if (cpus_have_final_cap(ARM64_HAS_EVT) && - !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE)) + !cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE) && + kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0) == read_sanitised_ftr_reg(SYS_CTR_EL0)) vcpu->arch.hcr_el2 |= HCR_TID4; else vcpu->arch.hcr_el2 |= HCR_TID2; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ac4e59256f8ed5c6294208bb6f80cfc2293b2db4..9ffdea9d59247fefbe19630c4b507ce608f210d2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -298,6 +298,10 @@ struct kvm_arch { }; bool is_virtcca_cvm; #endif + +#ifndef __GENKSYMS__ + u64 ctr_el0; +#endif }; struct kvm_vcpu_fault_info { @@ -1202,6 +1206,8 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, struct kvm_arm_counter_offset *offset); +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, + struct reg_mask_range *range); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); @@ -1283,6 +1289,24 @@ extern unsigned int twedel; void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu); bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu); +static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg) +{ + switch (reg) { + case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7): + return &ka->id_regs[IDREG_IDX(reg)]; + case SYS_CTR_EL0: + return &ka->ctr_el0; + default: + WARN_ON_ONCE(1); + return NULL; + } +} + +#define kvm_read_vm_id_reg(kvm, reg) \ + ({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; }) + +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); + extern bool force_wfi_trap; extern bool kvm_ncsnp_support; extern bool kvm_dvmbm_support; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 97941e582d83d0d0189d6a482a588290ac54df69..03b0278568187ea457aa08670d269593cefd48e2 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -509,6 +509,38 @@ struct kvm_smccc_filter { #define KVM_HYPERCALL_EXIT_SMC (1U << 0) #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) +/* + * Get feature ID registers userspace writable mask. + * + * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model + * Feature Register 2"): + * + * "The Feature ID space is defined as the System register space in + * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, + * op2=={0-7}." + * + * This covers all currently known R/O registers that indicate + * anything useful feature wise, including the ID registers. + * + * If we ever need to introduce a new range, it will be described as + * such in the range field. + */ +#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ + ({ \ + __u64 __op1 = (op1) & 3; \ + __op1 -= (__op1 == 3); \ + (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ + }) + +#define KVM_ARM_FEATURE_ID_RANGE 0 +#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) + +struct reg_mask_range { + __u64 addr; /* Pointer to mask array */ + __u32 range; /* Requested range */ + __u32 reserved[13]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2e5e4052a182e5ff30fb639de87c715c57467ddf..5dc7c36e5820caa3fb3a1bcaaab5d60bae8a9128 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -60,6 +60,15 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) return model == entry->midr_range.model; } +#ifdef CONFIG_HISILICON_ERRATUM_162100125 +static bool +hisilicon_162100125_match(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return is_affected_midr_range_list(entry, scope) && supports_cnp(scope); +} +#endif + #ifdef CONFIG_HISILICON_ERRATUM_1980005 static bool hisilicon_1980005_match(const struct arm64_cpu_capabilities *entry, @@ -639,7 +648,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Hisilicon erratum 162100125", .capability = ARM64_WORKAROUND_HISILICON_ERRATUM_162100125, - ERRATA_MIDR_RANGE_LIST(hisilicon_erratum_162100125_cpus), + .matches = hisilicon_162100125_match, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .midr_range_list = hisilicon_erratum_162100125_cpus, }, #endif #ifdef CONFIG_HISILICON_ERRATUM_1980005 diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 4bca7ad7b5e336e5ea3d25411629e509a5d5be29..6b62a9776850447e0b8e98cfe2b685f9189037ca 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -865,6 +865,7 @@ u8 spectre_bhb_loop_affected(int scope) static const struct midr_range spectre_bhb_k24_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_HISI_LINXICORE9100), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), {}, }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0126a9203ae7641257fa5d3b8a443d8abd2f0c4..48cb623f8549f6274126c4f188595f7bd52dea1c 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -436,6 +436,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = static_key_enabled(&virtcca_cvm_is_available); break; #endif + case KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES: + r = BIT(0); + break; default: r = 0; } @@ -1897,6 +1900,13 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return 0; } #endif + case KVM_ARM_GET_REG_WRITABLE_MASKS: { + struct reg_mask_range range; + + if (copy_from_user(&range, argp, sizeof(range))) + return -EFAULT; + return kvm_vm_ioctl_get_reg_writable_masks(kvm, &range); + } default: return -EINVAL; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 6469a7c51df3fdc74f0cee74cc3d2ef4ea8b7fb8..ea715ab0d5a6befad2049a73ed09a7ce2d0aaf9c 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -55,7 +55,7 @@ static u32 __kvm_pmu_event_mask(unsigned int pmuver) static u32 kvm_pmu_event_mask(struct kvm *kvm) { - u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1); + u64 dfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1); u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0); return __kvm_pmu_event_mask(pmuver); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0cfa8d93e2ee36ecb14b1db62db3b25a3c8db299..cb31964c95c4978c6093188c26d413c2e9deffdc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -385,7 +385,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + u64 val = IDREG(vcpu->kvm, SYS_ID_AA64MMFR1_EL1); u32 sr = reg_to_encoding(r); if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) { @@ -1247,8 +1247,14 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, /* Some features have different safe value type in KVM than host features */ switch (id) { case SYS_ID_AA64DFR0_EL1: - if (kvm_ftr.shift == ID_AA64DFR0_EL1_PMUVer_SHIFT) + switch (kvm_ftr.shift) { + case ID_AA64DFR0_EL1_PMUVer_SHIFT: kvm_ftr.type = FTR_LOWER_SAFE; + break; + case ID_AA64DFR0_EL1_DebugVer_SHIFT: + kvm_ftr.type = FTR_LOWER_SAFE; + break; + } break; case SYS_ID_DFR0_EL1: if (kvm_ftr.shift == ID_DFR0_EL1_PerfMon_SHIFT) @@ -1357,6 +1363,12 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI), vcpu->kvm->arch.pfr1_nmi); + /* + * MPAM is disabled by default as KVM also needs a set of PARTID to + * program the MPAMVPMx_EL2 PARTID remapping registers with. Hide, + * MPAM_frac as well from Guest to handle FEAT_MPAMv0p1. + */ + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1392,7 +1404,7 @@ static u64 kvm_read_sanitised_id_reg(struct kvm_vcpu *vcpu, static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - return IDREG(vcpu->kvm, reg_to_encoding(r)); + return kvm_read_vm_id_reg(vcpu->kvm, reg_to_encoding(r)); } /* @@ -1401,11 +1413,21 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r */ static inline bool is_id_reg(u32 id) { + if (id == SYS_CTR_EL0) + return true; + return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && sys_reg_CRm(id) < 8); } +static inline bool is_aa32_id_reg(u32 id) +{ + return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 && + sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 && + sys_reg_CRm(id) <= 3); +} + static unsigned int id_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { @@ -1499,17 +1521,31 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, val &= ~ID_AA64PFR0_EL1_AMU_MASK; + /* + * MPAM is disabled by default as KVM also needs a set of PARTID to + * program the MPAMVPMx_EL2 PARTID remapping registers with. But some + * older kernels let the guest see the ID bit. + */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + return val; } +#define ID_REG_LIMIT_FIELD_ENUM(val, reg, field, limit) \ +({ \ + u64 __f_val = FIELD_GET(reg##_##field##_MASK, val); \ + (val) &= ~reg##_##field##_MASK; \ + (val) |= FIELD_PREP(reg##_##field##_MASK, \ + min(__f_val, (u64)reg##_##field##_##limit)); \ + (val); \ +}) + static u64 read_sanitised_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { u64 val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); - /* Limit debug to ARMv8.0 */ - val &= ~ID_AA64DFR0_EL1_DebugVer_MASK; - val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DebugVer, IMP); + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8); /* * Only initialize the PMU version if the vCPU was configured with one. @@ -1532,6 +1568,7 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { + u8 debugver = SYS_FIELD_GET(ID_AA64DFR0_EL1, DebugVer, val); u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val); /* @@ -1551,9 +1588,39 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) val &= ~ID_AA64DFR0_EL1_PMUVer_MASK; + /* + * ID_AA64DFR0_EL1.DebugVer is one of those awkward fields with a + * nonzero minimum safe value. + */ + if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP) + return -EINVAL; + return set_id_reg(vcpu, rd, val); } +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, u64 user_val) +{ + u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + u64 mpam_mask = ID_AA64PFR0_EL1_MPAM_MASK; + + /* + * Commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits + * in ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to + * guests, but didn't add trap handling. KVM doesn't support MPAM and + * always returns an UNDEF for these registers. The guest must see 0 + * for this field. + * + * But KVM must also accept values from user-space that were provided + * by KVM. On CPUs that support MPAM, permit user-space to write + * the santisied value to ID_AA64PFR0_EL1.MPAM, but ignore this field. + */ + if ((hw_val & mpam_mask) == (user_val & mpam_mask)) + user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + + return set_id_reg(vcpu, rd, user_val); +} + static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { @@ -1564,6 +1631,8 @@ static u64 read_sanitised_id_dfr0_el1(struct kvm_vcpu *vcpu, if (kvm_vcpu_has_pmu(vcpu)) val |= SYS_FIELD_PREP(ID_DFR0_EL1, PerfMon, perfmon); + val = ID_REG_LIMIT_FIELD_ENUM(val, ID_DFR0_EL1, CopDbg, Debugv8p8); + return val; } @@ -1572,6 +1641,7 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, u64 val) { u8 perfmon = SYS_FIELD_GET(ID_DFR0_EL1, PerfMon, val); + u8 copdbg = SYS_FIELD_GET(ID_DFR0_EL1, CopDbg, val); if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) { val &= ~ID_DFR0_EL1_PerfMon_MASK; @@ -1587,6 +1657,9 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, if (perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3) return -EINVAL; + if (copdbg < ID_DFR0_EL1_CopDbg_Armv8) + return -EINVAL; + return set_id_reg(vcpu, rd, val); } @@ -1594,6 +1667,8 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { + u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + u64 mpamfrac_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK; u8 nmi; nmi = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR1_EL1_NMI_SHIFT); @@ -1601,15 +1676,23 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, (nmi && (!cpus_have_const_cap(ARM64_HAS_NMI) || static_branch_unlikely(&vgic_v3_cpuif_trap)))) return -EINVAL; - /* We can only differ with NMI, and anything else is an error */ - val ^= read_id_reg(vcpu, rd); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); - if (val) - return -EINVAL; - vcpu->kvm->arch.pfr1_nmi = nmi; - return 0; + /* + * KVM doesn't support MPAM but Commit 14e270fa5c4c ("arm64/cpufeature: + * Add remaining feature bits in ID_AA64PFR1 register") exposed the + * MPAM_frac field of AA64PFR1_EL1 to guests.Though we already hide + * MPAM in ID_AA64PFR0_EL1, need to hide this MPAM_frac field as well + * in order to take of systems with FEAT_MPAMv0p1 support. + * + * Similar to MPAM field in ID_AA64PFR0_EL1, on CPUs that support + * FEAT_MPAMv0p1, permit user-space to write the santisied value to + * ID_AA64PFR1_EL1.MPAM_frac, but ignore this field. + */ + if ((hw_val & mpamfrac_mask) == (val & mpamfrac_mask)) + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + + return set_id_reg(vcpu, rd, val); } /* @@ -1662,7 +1745,7 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, ret = arm64_check_features(vcpu, rd, val); if (!ret) - IDREG(vcpu->kvm, id) = val; + kvm_set_vm_id_reg(vcpu->kvm, id, val); mutex_unlock(&vcpu->kvm->arch.config_lock); @@ -1678,6 +1761,18 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return ret; } +void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val) +{ + u64 *p = __vm_id_reg(&kvm->arch, reg); + + lockdep_assert_held(&kvm->arch.config_lock); + + if (KVM_BUG_ON(kvm_vm_has_ran_once(kvm) || !p, kvm)) + return; + + *p = val; +} + static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 *val) { @@ -1697,7 +1792,7 @@ static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) return write_to_read_only(vcpu, p, r); - p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0); + p->regval = kvm_read_vm_id_reg(vcpu->kvm, SYS_CTR_EL0); return true; } @@ -1875,11 +1970,14 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, * from userspace. */ -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define ID_SANITISED(name) { \ +#define ID_DESC(name) \ SYS_DESC(SYS_##name), \ .access = access_id_reg, \ - .get_user = get_id_reg, \ + .get_user = get_id_reg \ + +/* sys_reg_desc initialiser for known cpufeature ID registers */ +#define ID_SANITISED(name) { \ + ID_DESC(name), \ .set_user = set_id_reg, \ .visibility = id_visibility, \ .reset = kvm_read_sanitised_id_reg, \ @@ -1888,15 +1986,22 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, /* sys_reg_desc initialiser for known cpufeature ID registers */ #define AA32_ID_SANITISED(name) { \ - SYS_DESC(SYS_##name), \ - .access = access_id_reg, \ - .get_user = get_id_reg, \ + ID_DESC(name), \ .set_user = set_id_reg, \ .visibility = aa32_id_visibility, \ .reset = kvm_read_sanitised_id_reg, \ .val = 0, \ } +/* sys_reg_desc initialiser for writable ID registers */ +#define ID_WRITABLE(name, mask) { \ + ID_DESC(name), \ + .set_user = set_id_reg, \ + .visibility = id_visibility, \ + .reset = kvm_read_sanitised_id_reg, \ + .val = mask, \ +} + /* * sys_reg_desc initialiser for architecturally unallocated cpufeature ID * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 @@ -1918,9 +2023,7 @@ static unsigned int elx2_visibility(const struct kvm_vcpu *vcpu, * RAZ for the guest. */ #define ID_HIDDEN(name) { \ - SYS_DESC(SYS_##name), \ - .access = access_id_reg, \ - .get_user = get_id_reg, \ + ID_DESC(name), \ .set_user = set_id_reg, \ .visibility = raz_visibility, \ .reset = kvm_read_sanitised_id_reg, \ @@ -2089,7 +2192,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK, }, + .val = ID_DFR0_EL1_PerfMon_MASK | + ID_DFR0_EL1_CopDbg_MASK, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), AA32_ID_SANITISED(ID_MMFR1_EL1), @@ -2121,15 +2225,21 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, - .set_user = set_id_reg, + .set_user = set_id_aa64pfr0_el1, .reset = read_sanitised_id_aa64pfr0_el1, - .val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, }, + .val = ~(ID_AA64PFR0_EL1_AMU | + ID_AA64PFR0_EL1_MPAM | + ID_AA64PFR0_EL1_SVE | + ID_AA64PFR0_EL1_RAS | + ID_AA64PFR0_EL1_GIC | + ID_AA64PFR0_EL1_AdvSIMD | + ID_AA64PFR0_EL1_FP), }, { SYS_DESC(SYS_ID_AA64PFR1_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_aa64pfr1_el1, .reset = kvm_read_sanitised_id_reg, }, ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - ID_SANITISED(ID_AA64ZFR0_EL1), + ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0), ID_HIDDEN(ID_AA64SMFR0_EL1), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -2140,7 +2250,22 @@ static const struct sys_reg_desc sys_reg_descs[] = { .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, .reset = read_sanitised_id_aa64dfr0_el1, - .val = ID_AA64DFR0_EL1_PMUVer_MASK, }, + /* + * Prior to FEAT_Debugv8.9, the architecture defines context-aware + * breakpoints (CTX_CMPs) as the highest numbered breakpoints (BRPs). + * KVM does not trap + emulate the breakpoint registers, and as such + * cannot support a layout that misaligns with the underlying hardware. + * While it may be possible to describe a subset that aligns with + * hardware, just prevent changes to BRPs and CTX_CMPs altogether for + * simplicity. + * + * See DDI0487K.a, section D2.8.3 Breakpoint types and linking + * of breakpoints for more details. + */ + .val = ID_AA64DFR0_EL1_DoubleLock_MASK | + ID_AA64DFR0_EL1_WRPs_MASK | + ID_AA64DFR0_EL1_PMUVer_MASK | + ID_AA64DFR0_EL1_DebugVer_MASK, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), @@ -2150,9 +2275,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(5,7), /* CRm=6 */ - ID_SANITISED(ID_AA64ISAR0_EL1), - ID_SANITISED(ID_AA64ISAR1_EL1), - ID_SANITISED(ID_AA64ISAR2_EL1), + ID_WRITABLE(ID_AA64ISAR0_EL1, ~ID_AA64ISAR0_EL1_RES0), + ID_WRITABLE(ID_AA64ISAR1_EL1, ~(ID_AA64ISAR1_EL1_GPI | + ID_AA64ISAR1_EL1_GPA | + ID_AA64ISAR1_EL1_API | + ID_AA64ISAR1_EL1_APA)), + ID_WRITABLE(ID_AA64ISAR2_EL1, ~(ID_AA64ISAR2_EL1_RES0 | + ID_AA64ISAR2_EL1_MOPS | + ID_AA64ISAR2_EL1_APA3 | + ID_AA64ISAR2_EL1_GPA3)), ID_UNALLOCATED(6,3), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), @@ -2160,9 +2291,21 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(6,7), /* CRm=7 */ - ID_SANITISED(ID_AA64MMFR0_EL1), - ID_SANITISED(ID_AA64MMFR1_EL1), - ID_SANITISED(ID_AA64MMFR2_EL1), + ID_WRITABLE(ID_AA64MMFR0_EL1, ~(ID_AA64MMFR0_EL1_RES0 | + ID_AA64MMFR0_EL1_TGRAN4_2 | + ID_AA64MMFR0_EL1_TGRAN64_2 | + ID_AA64MMFR0_EL1_TGRAN16_2)), + ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 | + ID_AA64MMFR1_EL1_TWED | + ID_AA64MMFR1_EL1_XNX | + ID_AA64MMFR1_EL1_VH | + ID_AA64MMFR1_EL1_VMIDBits)), + ID_WRITABLE(ID_AA64MMFR2_EL1, ~(ID_AA64MMFR2_EL1_RES0 | + ID_AA64MMFR2_EL1_EVT | + ID_AA64MMFR2_EL1_FWB | + ID_AA64MMFR2_EL1_IDS | + ID_AA64MMFR2_EL1_NV | + ID_AA64MMFR2_EL1_CCIDX)), ID_SANITISED(ID_AA64MMFR3_EL1), ID_UNALLOCATED(7,4), ID_UNALLOCATED(7,5), @@ -2276,11 +2419,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, { SYS_DESC(SYS_CLIDR_EL1), access_clidr, reset_clidr, CLIDR_EL1, - .set_user = set_clidr }, + .set_user = set_clidr, .val = ~CLIDR_EL1_RES0 }, { SYS_DESC(SYS_CCSIDR2_EL1), undef_access }, { SYS_DESC(SYS_SMIDR_EL1), undef_access }, { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, - { SYS_DESC(SYS_CTR_EL0), access_ctr }, + ID_WRITABLE(CTR_EL0, CTR_EL0_DIC_MASK | + CTR_EL0_IDC_MASK | + CTR_EL0_DminLine_MASK | + CTR_EL0_L1Ip_MASK | + CTR_EL0_IminLine_MASK), { SYS_DESC(SYS_SVCR), undef_access }, { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, @@ -2555,14 +2702,15 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); - u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT); - - p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) | - (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) | - (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20) - | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12)); + u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1); + u64 pfr = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1); + u32 el3 = !!SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr); + + p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) | + (SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr) << 24) | + (SYS_FIELD_GET(ID_AA64DFR0_EL1, CTX_CMPs, dfr) << 20) | + (SYS_FIELD_GET(ID_AA64DFR0_EL1, DebugVer, dfr) << 16) | + (1 << 15) | (el3 << 14) | (el3 << 12)); return true; } } @@ -3394,8 +3542,8 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id, */ #define FUNCTION_INVARIANT(reg) \ - static u64 get_##reg(struct kvm_vcpu *v, \ - const struct sys_reg_desc *r) \ + static u64 reset_##reg(struct kvm_vcpu *v, \ + const struct sys_reg_desc *r) \ { \ ((struct sys_reg_desc *)r)->val = read_sysreg(reg); \ return ((struct sys_reg_desc *)r)->val; \ @@ -3405,18 +3553,11 @@ FUNCTION_INVARIANT(midr_el1) FUNCTION_INVARIANT(revidr_el1) FUNCTION_INVARIANT(aidr_el1) -static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) -{ - ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0); - return ((struct sys_reg_desc *)r)->val; -} - /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = { - { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, - { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, - { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, - { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, + { SYS_DESC(SYS_MIDR_EL1), NULL, reset_midr_el1 }, + { SYS_DESC(SYS_REVIDR_EL1), NULL, reset_revidr_el1 }, + { SYS_DESC(SYS_AIDR_EL1), NULL, reset_aidr_el1 }, }; static int get_invariant_sys_reg(u64 id, u64 __user *uaddr) @@ -3698,6 +3839,56 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } +#define KVM_ARM_FEATURE_ID_RANGE_INDEX(r) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(r), \ + sys_reg_Op1(r), \ + sys_reg_CRn(r), \ + sys_reg_CRm(r), \ + sys_reg_Op2(r)) + +static bool is_feature_id_reg(u32 encoding) +{ + return (sys_reg_Op0(encoding) == 3 && + (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) && + sys_reg_CRn(encoding) == 0 && + sys_reg_CRm(encoding) <= 7); +} + +int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + u64 __user *masks = (u64 __user *)range->addr; + + /* Only feature id range is supported, reserved[13] must be zero. */ + if (range->range || + memcmp(range->reserved, zero_page, sizeof(range->reserved))) + return -EINVAL; + + /* Wipe the whole thing first */ + if (clear_user(masks, KVM_ARM_FEATURE_ID_RANGE_SIZE * sizeof(__u64))) + return -EFAULT; + + for (int i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) { + const struct sys_reg_desc *reg = &sys_reg_descs[i]; + u32 encoding = reg_to_encoding(reg); + u64 val; + + if (!is_feature_id_reg(encoding) || !reg->set_user) + continue; + + if (!reg->val || + (is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0())) { + continue; + } + val = reg->val; + + if (put_user(val, (masks + KVM_ARM_FEATURE_ID_RANGE_INDEX(encoding)))) + return -EFAULT; + } + + return 0; +} + int __init kvm_sys_reg_table_init(void) { struct sys_reg_params params; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index f97ab651ccbb7aa89d9127fcde026c4f375803f4..cab3566ea863a2110a0dfcda0f3d0ad29d0b7ffd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1201,6 +1201,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 231 + #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 /* support userspace to request firmware to build CSV3 guest's memory space */ @@ -1625,6 +1627,7 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* Available with KVM_CAP_COUNTER_OFFSET */ #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) +#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) /* ioctl for SW vcpu init*/ #define KVM_SW64_VCPU_INIT _IO(KVMIO, 0xba)