diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 396ad4bcd8cf0d7904f46369bd507b093ebbd7b7..4501242385638b1314810cf615ab73dd2868cf86 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -818,7 +818,8 @@ static __always_inline bool system_uses_irq_prio_masking(void) static __always_inline bool system_uses_nmi(void) { return IS_ENABLED(CONFIG_ARM64_NMI) && - cpus_have_const_cap(ARM64_USES_NMI); + cpus_have_const_cap(ARM64_USES_NMI) && + !system_uses_irq_prio_masking(); } static inline bool system_supports_mte(void) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8c64fa87279b9b43bf826272d21e29e222dd9b0a..abe581982a7597243b0c1730bb8e27bcbbec04e1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -209,6 +209,8 @@ struct kvm_arch { /* VTCR_EL2 value for this VM */ u64 vtcr; + u8 pfr1_nmi; + /* Interrupt controller */ struct vgic_dist vgic; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0086a617dfa129c84a3b4f8308adbea35592e8b8..42358b8d678e070a25b8b0a9b564fe2e1829a82b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -300,6 +300,8 @@ #define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) #define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) +#define SYS_ALLINT sys_reg(3, 0, 4, 3, 0) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) @@ -932,6 +934,7 @@ #define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) #define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_NMI (1ULL << 59) #define ICH_LR_GROUP (1ULL << 60) #define ICH_LR_HW (1ULL << 61) #define ICH_LR_STATE (3ULL << 62) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 316c57b19c9344fa2eaa7878ca4d52adfb3b18f9..dccf8c3a23bb1953291f1ff2fa7e8339e4ee06a9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2189,20 +2189,24 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry } #endif -#ifdef CONFIG_ARM64_NMI static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) { if (!has_cpuid_feature(entry, scope)) return false; /* + * NMI support was not enabled in the kernel, but can still be + * used by guests. Let the world know. + * * Having both real and pseudo NMIs enabled simultaneously is * likely to cause confusion. Since pseudo NMIs must be * enabled with an explicit command line option, if the user * has set that option on a system with real NMIs for some * reason assume they know what they're doing. */ - if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { + if (!IS_ENABLED(CONFIG_ARM64_NMI)) + pr_info("CONFIG_ARM64_NMI disabled, using NMIs for guests only\n"); + else if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && enable_pseudo_nmi) { pr_info("Pseudo NMI enabled, not using architected NMI\n"); return false; } @@ -2210,6 +2214,7 @@ static bool use_nmi(const struct arm64_cpu_capabilities *entry, int scope) return true; } +#ifdef CONFIG_ARM64_NMI static void nmi_enable(const struct arm64_cpu_capabilities *__unused) { /* @@ -2821,7 +2826,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, -#ifdef CONFIG_ARM64_NMI { .desc = "Non-maskable Interrupts present", .capability = ARM64_HAS_NMI, @@ -2843,9 +2847,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_width = 4, .min_field_value = ID_AA64PFR1_EL1_NMI_IMP, .matches = use_nmi, +#ifdef CONFIG_ARM64_NMI .cpu_enable = nmi_enable, - }, #endif + }, #ifdef CONFIG_ARM64_MPAM { .desc = "Memory Partitioning And Monitoring", diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7f6de758589413fa209f39efa6fb8ddf2027faba..c71ef028dde66a2da422ea24430b9bc985b51fd6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -192,6 +192,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); + if (cpus_have_const_cap(ARM64_HAS_NMI) && !static_branch_unlikely(&vgic_v3_cpuif_trap)) + kvm->arch.pfr1_nmi = ID_AA64PFR1_EL1_NMI_IMP; + kvm_arm_init_hypercalls(kvm); bitmap_zero(kvm->arch.vcpu_features, KVM_VCPU_MAX_FEATURES); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 0a7f049bcf916dc2d1557d3623e1ed20a05419eb..821d803b395d192279ebdafde658bc4e9409257e 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -224,7 +224,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) vcpu_set_flag(vcpu, PMUSERENR_ON_CPU); } - if (cpus_have_final_cap(ARM64_HAS_NMI)) + if (cpus_have_final_cap(ARM64_HAS_NMI) && + !kern_hyp_va(vcpu->kvm)->arch.pfr1_nmi) sysreg_clear_set_s(SYS_HCRX_EL2, 0, HCRX_EL2_TALLINT); vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); @@ -252,7 +253,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) { write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); - if (cpus_have_final_cap(ARM64_HAS_NMI)) + if (cpus_have_final_cap(ARM64_HAS_NMI) && + !kern_hyp_va(vcpu->kvm)->arch.pfr1_nmi) sysreg_clear_set_s(SYS_HCRX_EL2, HCRX_EL2_TALLINT, 0); write_sysreg(0, hstr_el2); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 6cb638b184b1832ea43fa146bd5816dce73aa8e0..7a9e132f68dbe8d4580aa14bf67a27b6b5a490bc 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -130,7 +130,11 @@ static void __vgic_v3_write_ap0rn(u32 val, int n) } } -static void __vgic_v3_write_ap1rn(u32 val, int n) +/* + * Contrary to ICH_AP0Rn_EL2, ICH_AP1R0_EL2 is 64bit, thanks to the + * NMI bit stuck at [63]. Isn't that fun? + */ +static void __vgic_v3_write_ap1rn(u64 val, int n) { switch (n) { case 0: @@ -172,9 +176,10 @@ static u32 __vgic_v3_read_ap0rn(int n) return val; } -static u32 __vgic_v3_read_ap1rn(int n) +/* Same remark about the 64bit-ness of AP1R0 */ +static u64 __vgic_v3_read_ap1rn(int n) { - u32 val; + u64 val; switch (n) { case 0: @@ -1043,6 +1048,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) return 0; fn = __vgic_v3_read_iar; break; + case SYS_ICC_NMIAR1_EL1: + /* Here's an UNDEF for you */ + return 0; case SYS_ICC_EOIR0_EL1: case SYS_ICC_EOIR1_EL1: if (unlikely(is_read)) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b4608d1b08b5f6d6f83dce3419397acebf32a1a3..dc8663759b0e1e3c45cd1b401f38f4d4ac31b70e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -411,29 +411,21 @@ static bool trap_oslar_el1(struct kvm_vcpu *vcpu, return true; } -static bool trap_mpam(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool workaround_bad_mpam_abi(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { - u64 aa64pfr0_el1 = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1); - /* - * What did we expose to the guest? - * Earlier guests may have seen the ID bits, which can't be removed - * without breaking migration, but MPAMIDR_EL1 can advertise all-zeroes, - * indicating there are zero PARTID/PMG supported by the CPU, allowing - * the other two trapped registers (MPAM1_EL1 and MPAM0_EL1) to be - * treated as RAZ/WI. + * The ID register can't be removed without breaking migration, + * but MPAMIDR_EL1 can advertise all-zeroes, indicating there are zero + * PARTID/PMG supported by the CPU, allowing the other two trapped + * registers (MPAM1_EL1 and MPAM0_EL1) to be treated as RAZ/WI. * Emulating MPAM1_EL1 as RAZ/WI means the guest sees the MPAMEN bit * as clear, and realises MPAM isn't usable on this CPU. */ - if (FIELD_GET(ID_AA64PFR0_EL1_MPAM_MASK, aa64pfr0_el1)) { - p->regval = 0; - return true; - } + p->regval = 0; - kvm_inject_undefined(vcpu); - return false; + return true; } static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, @@ -1253,36 +1245,6 @@ static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, return arm64_ftr_safe_value(&kvm_ftr, new, cur); } -static u64 kvm_arm64_ftr_max(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - u64 pfr0, val = rd->reset(vcpu, rd); - u32 field, id = reg_to_encoding(rd); - - /* - * Some values may reset to a lower value than can be supported, - * get the maximum feature value. - */ - switch (id) { - case SYS_ID_AA64PFR0_EL1: - pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - - /* - * MPAM resets to 0, but migration of MPAM=1 guests is needed. - * See trap_mpam() for more. - */ - field = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT); - if (field == ID_AA64PFR0_EL1_MPAM_1) { - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, ID_AA64PFR0_EL1_MPAM_1); - } - - break; - } - - return val; -} - /** * arm64_check_features() - Check if a feature register value constitutes * a subset of features indicated by the idreg's KVM sanitised limit. @@ -1303,7 +1265,8 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, const struct arm64_ftr_bits *ftrp = NULL; u32 id = reg_to_encoding(rd); u64 writable_mask = rd->val; - u64 limit, mask = 0; + u64 limit = rd->reset(vcpu, rd); + u64 mask = 0; /* * Hidden and unallocated ID registers may not have a corresponding @@ -1317,7 +1280,6 @@ static int arm64_check_features(struct kvm_vcpu *vcpu, if (!ftr_reg) return -EINVAL; - limit = kvm_arm64_ftr_max(vcpu, rd); ftrp = ftr_reg->ftr_bits; for (; ftrp && ftrp->width; ftrp++) { @@ -1380,6 +1342,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu, val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI), vcpu->kvm->arch.pfr1_nmi); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1522,14 +1485,6 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, val &= ~ID_AA64PFR0_EL1_AMU_MASK; - /* - * MPAM is disabled by default as KVM also needs a set of PARTID to - * program the MPAMVPMx_EL2 PARTID remapping registers with. But some - * older kernels let the guest see the ID bit. Turning it on causes - * the registers to be emulated as RAZ/WI. See trap_mpam() for more. - */ - val &= ~ID_AA64PFR0_EL1_MPAM_MASK; - return val; } @@ -1618,6 +1573,28 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, return set_id_reg(vcpu, rd, val); } +static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 nmi; + + nmi = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR1_EL1_NMI_SHIFT); + if (nmi > ID_AA64PFR1_EL1_NMI_IMP || + (nmi && (!cpus_have_const_cap(ARM64_HAS_NMI) || static_branch_unlikely(&vgic_v3_cpuif_trap)))) + return -EINVAL; + + /* We can only differ with NMI, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_NMI); + if (val) + return -EINVAL; + + vcpu->kvm->arch.pfr1_nmi = nmi; + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1981,6 +1958,9 @@ static bool access_spsr(struct kvm_vcpu *vcpu, * guest... */ static const struct sys_reg_desc sys_reg_descs[] = { + { SYS_DESC(SYS_ALLINT_CLR), undef_access }, + { SYS_DESC(SYS_ALLINT_SET), undef_access }, + { SYS_DESC(SYS_DC_ISW), access_dcsw }, { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, @@ -2079,7 +2059,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { .set_user = set_id_reg, .reset = read_sanitised_id_aa64pfr0_el1, .val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, }, - ID_SANITISED(ID_AA64PFR1_EL1), + { SYS_DESC(SYS_ID_AA64PFR1_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64pfr1_el1, + .reset = kvm_read_sanitised_id_reg, }, ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), ID_SANITISED(ID_AA64ZFR0_EL1), @@ -2147,6 +2129,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_SPSR_EL1), access_spsr}, { SYS_DESC(SYS_ELR_EL1), access_elr}, + { SYS_DESC(SYS_ALLINT), undef_access }, + { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, @@ -2194,17 +2178,18 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_LOREA_EL1), trap_loregion }, { SYS_DESC(SYS_LORN_EL1), trap_loregion }, { SYS_DESC(SYS_LORC_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAMIDR_EL1), trap_mpam }, + { SYS_DESC(SYS_MPAMIDR_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_LORID_EL1), trap_loregion }, - { SYS_DESC(SYS_MPAM1_EL1), trap_mpam }, - { SYS_DESC(SYS_MPAM0_EL1), trap_mpam }, + { SYS_DESC(SYS_MPAM1_EL1), workaround_bad_mpam_abi }, + { SYS_DESC(SYS_MPAM0_EL1), workaround_bad_mpam_abi }, { SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 }, { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 }, { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only }, + { SYS_DESC(SYS_ICC_NMIAR1_EL1), undef_access }, { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 20fed597531b09d6db7af08a578e7dd3ad29fb6c..340f960a11548e79fa78d658b501dc601e29dea1 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -155,7 +155,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); - seq_printf(s, "G=group\n"); + seq_printf(s, "G=group, N=NMI\n"); } static void print_header(struct seq_file *s, struct vgic_irq *irq, @@ -170,8 +170,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq, } seq_printf(s, "\n"); - seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id); - seq_printf(s, "----------------------------------------------------------------\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCGN HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "-----------------------------------------------------------------\n"); } static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, @@ -204,7 +204,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, seq_printf(s, " %s %4d " " %2d " - "%d%d%d%d%d%d%d " + "%d%d%d%d%d%d%d%d " "%8d " "%8x " " %2x " @@ -220,6 +220,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, irq->hw, irq->config == VGIC_CONFIG_LEVEL, irq->group, + irq->nmi, irq->hwintid, irq->mpidr, irq->source, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 5ca9d945e3a2dea33df16f43c92e5a51cfcdd312..0d9d71dc338eecc1cb9cfd4b304dc4918a18a67b 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -353,8 +353,16 @@ int vgic_init(struct kvm *kvm) * If userspace didn't set the GIC implementation revision, * default to the latest and greatest. You know want it. */ - if (!dist->implementation_rev) + if (!dist->implementation_rev) { dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST; + /* + * Advertise NMI if available. Userspace that explicitly + * doesn't want NMI will have written to GICD_{IIDR,TYPER} + * to set the implementation and the NMI support status. + */ + dist->has_nmi = kvm_vgic_global_state.has_nmi; + } + dist->initialized = true; out: diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index e070cda86e12ffeb12a94284cc34e93d55cf4353..365d83b0f263f64af8cdeaad9b30bf1c7eef7a2e 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -95,6 +95,7 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: + case KVM_VGIC_IMP_REV_4: vcpu->kvm->arch.vgic.v2_groups_user_writable = true; dist->implementation_rev = reg; return 0; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index dde858daa7d74952228561fe3787aa9b0cd35e40..3dd779cae09f430fb3fe283f7cf204b416e25cb8 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -78,6 +78,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, case GICD_TYPER: value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; value = (value >> 5) - 1; + if (vgic->has_nmi) + value |= GICD_TYPER_NMI; if (vgic_has_its(vcpu->kvm)) { value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; value |= GICD_TYPER_LPIS; @@ -158,6 +160,13 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, u32 reg; switch (addr & 0x0c) { + case GICD_TYPER: + if (dist->implementation_rev >= KVM_VGIC_IMP_REV_4 && + kvm_vgic_global_state.has_nmi) + dist->has_nmi = val & GICD_TYPER_NMI; + else if (val & GICD_TYPER_NMI) + return -EINVAL; + return 0; case GICD_TYPER2: if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) return -EINVAL; @@ -171,6 +180,10 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, switch (reg) { case KVM_VGIC_IMP_REV_2: case KVM_VGIC_IMP_REV_3: + /* Disable NMI on selecting an older revision */ + dist->has_nmi = false; + fallthrough; + case KVM_VGIC_IMP_REV_4: dist->implementation_rev = reg; return 0; default: @@ -186,7 +199,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, return 0; } - vgic_mmio_write_v3_misc(vcpu, addr, len, val); + /* Not reachable... */ return 0; } @@ -598,6 +611,55 @@ static void vgic_mmio_write_invall(struct kvm_vcpu *vcpu, vgic_set_rdist_busy(vcpu, false); } +static unsigned long vgic_mmio_read_nmi(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->nmi) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static void vgic_mmio_write_nmi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + unsigned long flags; + int i; + + if (!vcpu->kvm->arch.vgic.has_nmi) + return; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + bool was_nmi; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + was_nmi = irq->nmi; + irq->nmi = (val & BIT(i)); + + if (irq->hw && vgic_irq_is_sgi(irq->intid) && + was_nmi != irq->nmi) + vgic_update_vsgi(irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -671,6 +733,9 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_INMIR, + vgic_mmio_read_nmi, vgic_mmio_write_nmi, NULL, NULL, 1, + VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), @@ -755,6 +820,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_INMIR0, + vgic_mmio_read_nmi, vgic_mmio_write_nmi, 4, + VGIC_ACCESS_32bit), }; unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 7aca296f9b3f8fab4d62768335fb015b94652c2c..59b81e5ce1d8a3cb5ff11b0261a6db90dda7d6f4 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -61,9 +61,11 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, return value; } -static void vgic_update_vsgi(struct vgic_irq *irq) +void vgic_update_vsgi(struct vgic_irq *irq) { - WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group)); + WARN_ON(its_prop_update_vsgi(irq->host_irq, + irq->nmi ? 0 : irq->priority, + irq->group, irq->nmi)); } void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, @@ -699,13 +701,17 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + unsigned long flags; int i; u64 val = 0; for (i = 0; i < len; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); - val |= (u64)irq->priority << (i * 8); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (!irq->nmi) + val |= (u64)irq->priority << (i * 8); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -732,10 +738,15 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); - /* Narrow the priority range to what we actually support */ - irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - if (vgic_direct_sgi_or_ppi(irq)) - vgic_update_vsgi(irq); + if (!irq->nmi) { + /* + * Narrow the priority range to what we + * actually support + */ + irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); + if (vgic_direct_sgi_or_ppi(irq)) + vgic_update_vsgi(irq); + } raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index c71fc1a70e24a4c520c57a1deade073e6fc5f37a..69ca111e349d95355595a3a6f69998d2c8f8316d 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -181,7 +181,11 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->group) val |= ICH_LR_GROUP; - val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; + if (vcpu->kvm->arch.pfr1_nmi == ID_AA64PFR1_EL1_NMI_IMP && + irq->nmi) + val |= ICH_LR_NMI; + else + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; } @@ -720,6 +724,12 @@ int vgic_v3_probe(const struct gic_kvm_info *info) static_branch_enable(&vgic_v3_cpuif_trap); } + if (info->has_nmi) { + kvm_vgic_global_state.has_nmi = !static_branch_unlikely(&vgic_v3_cpuif_trap); + kvm_info("GICv3 NMI support %s\n", + kvm_vgic_global_state.has_nmi ? "enabled" : "disabled due to trapping"); + } + kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 1d02c50593d9d85fa2f9587734d44540d3b32381..06fec5cea3d7f575338b0a3ecd2d24bfecda61fc 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -110,6 +110,7 @@ static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) vpe->sgi_config[irq->intid].enabled = irq->enabled; vpe->sgi_config[irq->intid].group = irq->group; vpe->sgi_config[irq->intid].priority = irq->priority; + vpe->sgi_config[irq->intid].nmi = irq->nmi; } static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 6e1fa6f2918ffbd2b75f2d72ff6060bc84ccde4c..2459b0adea0863761f52b312950f32f07e289954 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -252,6 +252,7 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) * * Otherwise things should be sorted by the priority field and the GIC * hardware support will take care of preemption of priority groups etc. + * NMI acts as a super-priority. * * Return negative if "a" sorts before "b", 0 to preserve order, and positive * to sort "b" before "a". @@ -287,7 +288,12 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a, goto out; } - /* Both pending and enabled, sort by priority */ + /* Both pending and enabled, sort by NMI and then priority */ + if (irqa->nmi != irqb->nmi) { + ret = (int)irqb->nmi - (int)irqa->nmi; + goto out; + } + ret = irqa->priority - irqb->priority; out: raw_spin_unlock(&irqb->irq_lock); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 764038fadf3fd68477fc32da6fee256fa1602c75..e38bf7d94d3bb19267429d4061d0d18e1e7bac73 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -352,6 +352,7 @@ bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); +void vgic_update_vsgi(struct vgic_irq *irq); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c66676a21ad012ca44177fa16cbc14dc4451831b..72a27a018e5c0fbb1a538daa90b44fdaf2949a6a 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -627,6 +627,7 @@ struct its_cmd_desc { u8 sgi; u8 priority; bool enable; + bool nmi; bool group; bool clear; } its_vsgi_cmd; @@ -800,6 +801,11 @@ static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio) its_mask_encode(&cmd->raw_cmd[0], prio >> 4, 23, 20); } +static void its_encode_sgi_nmi(struct its_cmd_block *cmd, bool nmi) +{ + its_mask_encode(&cmd->raw_cmd[0], nmi, 11, 11); +} + static void its_encode_sgi_group(struct its_cmd_block *cmd, bool grp) { its_mask_encode(&cmd->raw_cmd[0], grp, 10, 10); @@ -1203,6 +1209,7 @@ static struct its_vpe *its_build_vsgi_cmd(struct its_node *its, its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi); #endif its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority); + its_encode_sgi_nmi(cmd, desc->its_vsgi_cmd.nmi); its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group); its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear); its_encode_sgi_enable(cmd, desc->its_vsgi_cmd.enable); @@ -4495,6 +4502,7 @@ static void its_configure_sgi(struct irq_data *d, bool clear) desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority; desc.its_vsgi_cmd.enable = vpe->sgi_config[d->hwirq].enabled; desc.its_vsgi_cmd.group = vpe->sgi_config[d->hwirq].group; + desc.its_vsgi_cmd.nmi = vpe->sgi_config[d->hwirq].nmi; desc.its_vsgi_cmd.clear = clear; /* @@ -4653,6 +4661,7 @@ static int its_sgi_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) case PROP_UPDATE_VSGI: vpe->sgi_config[d->hwirq].priority = info->priority; vpe->sgi_config[d->hwirq].group = info->group; + vpe->sgi_config[d->hwirq].nmi = info->nmi; its_configure_sgi(d, false); return 0; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index a9fda74a198f686768c94c99d2b0350ce0fdc42b..8bb42ac8fb2a28c92f2b852d5856d690bf9f0108 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -148,11 +148,21 @@ static inline bool has_v3_3_nmi(void) { return gic_data.has_nmi && system_uses_nmi(); } + +static bool system_is_nmi_capable(void) +{ + return gic_data.has_nmi && cpus_have_const_cap(ARM64_HAS_NMI); +} #else static inline bool has_v3_3_nmi(void) { return false; } + +static bool system_is_nmi_capable(void) +{ + return false; +} #endif #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS @@ -2330,6 +2340,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_v3_kvm_info.has_nmi = system_is_nmi_capable(); #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS gic_v3_kvm_info.has_vtimer = gic_data.rdists.has_vtimer; #endif @@ -2681,6 +2692,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_v3_kvm_info.has_nmi = system_is_nmi_capable(); #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS gic_v3_kvm_info.has_vtimer = gic_data.rdists.has_vtimer; #endif diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 22a9a7f1739fda9cad313d02079805d51a18308e..494001500261c1cdca5f1ad1fd46a5db3496ae0d 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -393,13 +393,14 @@ int its_prop_update_vlpi(int irq, u8 config, bool inv) return irq_set_vcpu_affinity(irq, &info); } -int its_prop_update_vsgi(int irq, u8 priority, bool group) +int its_prop_update_vsgi(int irq, u8 priority, bool group, bool nmi) { struct its_cmd_info info = { .cmd_type = PROP_UPDATE_VSGI, { .priority = priority, .group = group, + .nmi = nmi, }, }; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 664d4a16970b5b727f80fb808810d339ea591492..0cbcf4e06761023c0b3394a20c88eb7aa063e7e1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -108,6 +108,9 @@ struct vgic_global { bool has_gicv4; bool has_gicv4_1; + /* NMI */ + bool has_nmi; + /* Pseudo GICv3 from outer space */ bool no_hw_deactivation; @@ -180,6 +183,7 @@ struct vgic_irq { bool active; /* not used for LPIs */ bool enabled; bool hw; /* Tied to HW IRQ */ + bool nmi; /* Configured as NMI */ struct kref refcount; /* Used for LPIs */ u32 hwintid; /* HW INTID number */ unsigned int host_irq; /* linux irq corresponding to hwintid */ @@ -287,7 +291,8 @@ struct vgic_dist { u32 implementation_rev; #define KVM_VGIC_IMP_REV_2 2 /* GICv2 restorable groups */ #define KVM_VGIC_IMP_REV_3 3 /* GICv3 GICR_CTLR.{IW,CES,RWP} */ -#define KVM_VGIC_IMP_REV_LATEST KVM_VGIC_IMP_REV_3 +#define KVM_VGIC_IMP_REV_4 4 /* GICv3 NMI */ +#define KVM_VGIC_IMP_REV_LATEST KVM_VGIC_IMP_REV_4 /* Userspace can write to GICv2 IGROUPR */ bool v2_groups_user_writable; @@ -320,6 +325,7 @@ struct vgic_dist { struct vgic_io_device dist_iodev; + bool has_nmi; bool has_its; bool table_write_in_progress; @@ -375,7 +381,7 @@ struct vgic_v3_cpu_if { u32 vgic_vmcr; u32 vgic_sre; /* Restored only, change ignored */ u32 vgic_ap0r[4]; - u32 vgic_ap1r[4]; + u64 vgic_ap1r[4]; u64 vgic_lr[VGIC_V3_MAX_LRS]; /* diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 90b23d8ebab9ee4231d2b680262931e2fd5bc3e5..f489defdd6e1f14603658e160a61ca3955119c53 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -57,6 +57,7 @@ struct its_vpe { u8 priority; bool enabled; bool group; + bool nmi; #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS } sgi_config[32]; int nr_irqs; @@ -129,6 +130,7 @@ struct its_cmd_info { struct { u8 priority; bool group; + bool nmi; }; }; }; @@ -143,7 +145,7 @@ int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); -int its_prop_update_vsgi(int irq, u8 priority, bool group); +int its_prop_update_vsgi(int irq, u8 priority, bool group, bool nmi); struct irq_domain_ops; int its_init_v4(struct irq_domain *domain, diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index ac362587d2fb9e581f45435d96af1c823d556925..aa2e9eaa9ad95f252731d2a0b512c1b409ee8192 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -32,6 +32,8 @@ struct gic_kvm_info { bool has_v4; /* rvpeid support */ bool has_v4_1; + /* NMI support */ + bool has_nmi; #ifdef CONFIG_VIRT_VTIMER_IRQ_BYPASS /* vtimer irqbypass support */ bool has_vtimer;