diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f130774b720c37353f052595861b23d6911fb7af..97c5752eea1ddb609b9441eb0d05d17e063feb70 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -2380,6 +2380,7 @@ CONFIG_MISC_RTSX=m # CONFIG_HISI_HIKEY_USB is not set # CONFIG_OPEN_DICE is not set # CONFIG_VCPU_STALL_DETECTOR is not set +CONFIG_VIRT_PLAT_DEV=y # CONFIG_C2PORT is not set # diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3d6725ff0bf6d24577c84c6814fcc814b0681fb8..b498b8070347108385b7667a0ee5f457c96ee88e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -112,6 +112,7 @@ static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 &= ~HCR_TWE; if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || + vcpu->kvm->arch.vgic.vtimer_irqbypass || vcpu->kvm->arch.vgic.nassgireq) vcpu->arch.hcr_el2 &= ~HCR_TWI; else diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c0c050e53157d9908c91fd781aa1b5d3271e4092..2f303ff92c570996a8b004a763750659b737dec5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -20,7 +20,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o + vgic/vgic-its.o vgic/shadow_dev.o vgic/vgic-debug.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index a1e24228aaaa764f3f96a3780e60a3003694b22b..e8aba14bcb7e39c1ae568ddb176b31f629512f7c 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -29,6 +29,19 @@ static unsigned int host_ptimer_irq; static u32 host_vtimer_irq_flags; static u32 host_ptimer_irq_flags; +bool vtimer_irqbypass; + +static int __init early_vtimer_irqbypass(char *buf) +{ + return strtobool(buf, &vtimer_irqbypass); +} +early_param("kvm-arm.vtimer_irqbypass", early_vtimer_irqbypass); + +static inline bool vtimer_is_irqbypass(void) +{ + return !!vtimer_irqbypass && kvm_vgic_vtimer_irqbypass_support(); +} + static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); static const u8 default_ppi[] = { @@ -653,6 +666,43 @@ static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, boo WARN_ON(r); } +static void kvm_vtimer_mbigen_auto_clr_set(struct kvm_vcpu *vcpu, bool set) +{ + BUG_ON(!vtimer_is_irqbypass()); + + vtimer_mbigen_set_auto_clr(vcpu->cpu, set); +} + +static void kvm_vtimer_gic_auto_clr_set(struct kvm_vcpu *vcpu, bool set) +{ + BUG_ON(!vtimer_is_irqbypass()); + + vtimer_gic_set_auto_clr(vcpu->cpu, set); +} + +static void kvm_vtimer_mbigen_restore_stat(struct kvm_vcpu *vcpu) +{ + struct vtimer_mbigen_context *mbigen_ctx = vcpu_vtimer_mbigen(vcpu); + u16 vpeid = kvm_vgic_get_vcpu_vpeid(vcpu); + unsigned long flags; + + WARN_ON(!vtimer_is_irqbypass()); + + local_irq_save(flags); + + if (mbigen_ctx->loaded) + goto out; + + vtimer_mbigen_set_vector(vcpu->cpu, vpeid); + + if (mbigen_ctx->active) + vtimer_mbigen_set_active(vcpu->cpu, true); + + mbigen_ctx->loaded = true; +out: + local_irq_restore(flags); +} + static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) { struct kvm_vcpu *vcpu = ctx->vcpu; @@ -834,20 +884,35 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); + if (vtimer_is_irqbypass()) { + kvm_vtimer_mbigen_auto_clr_set(vcpu, false); + kvm_vtimer_mbigen_restore_stat(vcpu); + + goto skip_load_vtimer; + } + if (static_branch_likely(&has_gic_active_state)) { if (vcpu_has_nv(vcpu)) kvm_timer_vcpu_load_nested_switch(vcpu, &map); kvm_timer_vcpu_load_gic(map.direct_vtimer); - if (map.direct_ptimer) - kvm_timer_vcpu_load_gic(map.direct_ptimer); } else { kvm_timer_vcpu_load_nogic(vcpu); } +skip_load_vtimer: + if (static_branch_likely(&has_gic_active_state) && map.direct_ptimer) + kvm_timer_vcpu_load_gic(map.direct_ptimer); + kvm_timer_unblocking(vcpu); timer_restore_state(map.direct_vtimer); + + if(vtimer_is_irqbypass()) { + kvm_vtimer_mbigen_auto_clr_set(vcpu, true); + kvm_vtimer_gic_auto_clr_set(vcpu, true); + } + if (map.direct_ptimer) timer_restore_state(map.direct_ptimer); if (map.emul_vtimer) @@ -875,6 +940,29 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) kvm_timer_should_fire(ptimer) != plevel; } +static void kvm_vtimer_mbigen_save_stat(struct kvm_vcpu *vcpu) +{ + struct vtimer_mbigen_context *mbigen_ctx = vcpu_vtimer_mbigen(vcpu); + unsigned long flags; + + WARN_ON(!vtimer_is_irqbypass()); + + local_irq_save(flags); + + if (!mbigen_ctx->loaded) + goto out; + + mbigen_ctx->active = vtimer_mbigen_get_active(vcpu->cpu); + + /* Clear active state in MBIGEN now that we've saved everything. */ + if (mbigen_ctx->active) + vtimer_mbigen_set_active(vcpu->cpu, false); + + mbigen_ctx->loaded = false; +out: + local_irq_restore(flags); +} + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -885,7 +973,18 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); + if (vtimer_is_irqbypass()) { + kvm_vtimer_mbigen_auto_clr_set(vcpu, false); + kvm_vtimer_gic_auto_clr_set(vcpu, false); + } + timer_save_state(map.direct_vtimer); + + if (vtimer_is_irqbypass()) { + kvm_vtimer_mbigen_save_stat(vcpu); + kvm_vtimer_mbigen_auto_clr_set(vcpu, true); + } + if (map.direct_ptimer) timer_save_state(map.direct_ptimer); @@ -965,6 +1064,15 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) } if (timer->enabled) { + if (vtimer_is_irqbypass()) { + kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + + if (irqchip_in_kernel(vcpu->kvm) && map.direct_ptimer) + kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); + + goto skip_reset_vtimer; + } + for (int i = 0; i < nr_timers(vcpu); i++) kvm_timer_update_irq(vcpu, false, vcpu_get_timer(vcpu, i)); @@ -978,6 +1086,8 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (map.emul_vtimer) soft_timer_cancel(&map.emul_vtimer->hrtimer); + +skip_reset_vtimer: if (map.emul_ptimer) soft_timer_cancel(&map.emul_ptimer->hrtimer); @@ -1036,6 +1146,11 @@ void kvm_timer_init_vm(struct kvm *kvm) void kvm_timer_cpu_up(void) { + if (vtimer_is_irqbypass()) { + enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); + return; + } + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); if (host_ptimer_irq) enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); @@ -1043,6 +1158,9 @@ void kvm_timer_cpu_up(void) void kvm_timer_cpu_down(void) { + if (vtimer_is_irqbypass()) + return; + disable_percpu_irq(host_vtimer_irq); if (host_ptimer_irq) disable_percpu_irq(host_ptimer_irq); @@ -1380,6 +1498,31 @@ int __init kvm_timer_hyp_init(bool has_gic) return -ENODEV; } + /* + * vtimer-irqbypass depends on: + * + * - HW support at mbigen level (vtimer_irqbypass_hw_support) + * - HW support at GIC level (kvm_vgic_vtimer_irqbypass_support) + * - in_kernel irqchip support + * - "kvm-arm.vtimer_irqbypass=1" + */ + vtimer_irqbypass &= vtimer_irqbypass_hw_support(info); + vtimer_irqbypass &= has_gic; + if (vtimer_is_irqbypass()) { + kvm_info("vtimer-irqbypass enabled\n"); + + /* + * If vtimer irqbypass is enabled, there's no need to use the + * vtimer forwarded irq inject. + */ + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + } + goto ptimer_irq_init; + } + err = kvm_irq_init(info); if (err) return err; @@ -1407,6 +1550,7 @@ int __init kvm_timer_hyp_init(bool has_gic) kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); +ptimer_irq_init: /* Now let's do the physical EL1 timer irq */ if (info->physical_irq > 0) { @@ -1506,15 +1650,81 @@ static bool kvm_arch_timer_get_input_level(int vintid) return false; } +static void vtimer_set_active_stat(struct kvm_vcpu *vcpu, int vintid, bool set) +{ + struct vtimer_mbigen_context *mbigen_ctx = vcpu_vtimer_mbigen(vcpu); + int hwirq = timer_irq(vcpu_vtimer(vcpu)); + + WARN_ON(!vtimer_is_irqbypass() || hwirq != vintid); + + if (!mbigen_ctx->loaded) + mbigen_ctx->active = set; + else + vtimer_mbigen_set_active(vcpu->cpu, set); +} + +static bool vtimer_get_active_stat(struct kvm_vcpu *vcpu, int vintid) +{ + struct vtimer_mbigen_context *mbigen_ctx = vcpu_vtimer_mbigen(vcpu); + int hwirq = timer_irq(vcpu_vtimer(vcpu)); + + WARN_ON(!vtimer_is_irqbypass() || hwirq != vintid); + + if (!mbigen_ctx->loaded) + return mbigen_ctx->active; + else + return vtimer_mbigen_get_active(vcpu->cpu); +} + +int kvm_vtimer_config(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0; + unsigned long c; + + if (!vtimer_is_irqbypass()) + return 0; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + mutex_lock(&kvm->lock); + if (dist->vtimer_irqbypass) + goto out; + + kvm_for_each_vcpu(c, vcpu, kvm) { + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + int intid; + + WARN_ON(timer->enabled); + + intid = timer_irq(vcpu_vtimer(vcpu)); + ret = kvm_vgic_config_vtimer_irqbypass(vcpu, intid, + vtimer_get_active_stat, + vtimer_set_active_stat); + if (ret) + goto out; + } + + dist->vtimer_irqbypass = true; + +out: + mutex_unlock(&kvm->lock); + return ret; +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); struct timer_map map; - int ret; + int ret = 0; if (timer->enabled) return 0; + if (!irqchip_in_kernel(vcpu->kvm) && vtimer_is_irqbypass()) + return -EINVAL; + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ if (!irqchip_in_kernel(vcpu->kvm)) goto no_vgic; @@ -1530,6 +1740,9 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) get_timer_map(vcpu, &map); + if (vtimer_is_irqbypass()) + goto skip_map_vtimer; + ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, timer_irq(map.direct_vtimer), @@ -1537,6 +1750,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (ret) return ret; +skip_map_vtimer: if (map.direct_ptimer) { ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4866b3f7b4ea3847d885e00cfac47a4d7abf9da3..fc7ff28641f31b764f18f36ccbee3dae505d4548 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -317,6 +317,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: r = kvm_supported_block_sizes(); break; + case KVM_CAP_ARM_VIRT_MSI_BYPASS: + r = sdev_enable; + break; default: r = 0; } @@ -603,6 +606,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) kvm_arm_vcpu_init_debug(vcpu); + ret = kvm_vtimer_config(kvm); + if (ret) + return ret; + if (likely(irqchip_in_kernel(kvm))) { /* * Map the VGIC hardware resources before running a vcpu the @@ -1629,6 +1636,36 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return kvm_vm_set_attr(kvm, &attr); } + + case KVM_CREATE_SHADOW_DEV: { + struct kvm_master_dev_info *mdi; + u32 nvectors; + int ret; + + if (get_user(nvectors, (const u32 __user *)argp)) + return -EFAULT; + if (!nvectors) + return -EINVAL; + + mdi = memdup_user(argp, sizeof(*mdi) + nvectors * sizeof(mdi->msi[0])); + if (IS_ERR(mdi)) + return PTR_ERR(mdi); + + ret = kvm_shadow_dev_create(kvm, mdi); + kfree(mdi); + + return ret; + } + case KVM_DEL_SHADOW_DEV: { + u32 devid; + + if (get_user(devid, (const u32 __user *)argp)) + return -EFAULT; + + kvm_shadow_dev_delete(kvm, devid); + return 0; + } + default: return -EINVAL; } @@ -2393,6 +2430,11 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) kvm_arm_resume_guest(irqfd->kvm); } +void kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ + kvm_shadow_dev_delete_all(kvm); +} + /* Initialize Hyp-mode and memory mappings on all CPUs */ static __init int kvm_arm_init(void) { @@ -2470,6 +2512,8 @@ static __init int kvm_arm_init(void) kvm_arm_initialised = true; + kvm_shadow_dev_init(); + return 0; out_subs: diff --git a/arch/arm64/kvm/vgic/shadow_dev.c b/arch/arm64/kvm/vgic/shadow_dev.c new file mode 100644 index 0000000000000000000000000000000000000000..3b1210954a944583a354a4b07f3c735ee1f70c34 --- /dev/null +++ b/arch/arm64/kvm/vgic/shadow_dev.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019-2020 HUAWEI TECHNOLOGIES CO., LTD., All Rights Reserved. + * Author: Wanghaibin + */ + +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *sdev_cleanup_wq; +static bool virt_msi_bypass; +bool sdev_enable; + +static void shadow_dev_destroy(struct work_struct *work); +static void sdev_virt_pdev_delete(struct platform_device *pdev); + +int shadow_dev_virq_bypass_inject(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e) +{ + struct shadow_dev *sdev = e->cache.data; + u32 vec = e->msi.data; + u32 host_irq = sdev->host_irq[vec]; + int ret; + + ret = irq_set_irqchip_state(host_irq, IRQCHIP_STATE_PENDING, true); + WARN_RATELIMIT(ret, "IRQ %d", host_irq); + + return ret; +} + +/* Must be called with the dist->sdev_list_lock held */ +struct shadow_dev *kvm_shadow_dev_get(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev; + + if (!sdev_enable) + return NULL; + + list_for_each_entry(sdev, &dist->sdev_list_head, entry) { + if (sdev->devid != msi->devid) + continue; + + if (sdev->nvecs <= msi->data || + !test_bit(msi->data, sdev->enable)) + break; + + return sdev; + } + + return NULL; +} + +static struct platform_device *sdev_virt_pdev_add(u32 nvec) +{ + struct platform_device *virtdev; + int ret = -ENOMEM; + + virtdev = platform_device_alloc("virt_plat_dev", PLATFORM_DEVID_AUTO); + if (!virtdev) { + kvm_err("Allocate virtual platform device failed\n"); + goto out; + } + + dev_set_drvdata(&virtdev->dev, &nvec); + + ret = platform_device_add(virtdev); + if (ret) { + kvm_err("Add virtual platform device failed (%d)\n", ret); + goto put_device; + } + + return virtdev; + +put_device: + platform_device_put(virtdev); +out: + return ERR_PTR(ret); +} + +static void sdev_set_irq_entry(struct shadow_dev *sdev, + struct kvm_kernel_irq_routing_entry *irq_entries) +{ + int i; + + for (i = 0; i < sdev->nvecs; i++) { + irq_entries[i].msi.address_lo = sdev->msi[i].address_lo; + irq_entries[i].msi.address_hi = sdev->msi[i].address_hi; + irq_entries[i].msi.data = sdev->msi[i].data; + irq_entries[i].msi.flags = sdev->msi[i].flags; + irq_entries[i].msi.devid = sdev->msi[i].devid; + } +} + +static int sdev_virq_bypass_active(struct kvm *kvm, struct shadow_dev *sdev) +{ + struct kvm_kernel_irq_routing_entry *irq_entries; + struct msi_desc *desc; + u32 vec = 0; + + sdev->host_irq = kcalloc(sdev->nvecs, sizeof(int), GFP_KERNEL); + sdev->enable = bitmap_zalloc(sdev->nvecs, GFP_KERNEL); + irq_entries = kcalloc(sdev->nvecs, + sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + + if (!irq_entries || !sdev->enable || !sdev->host_irq) { + kfree(sdev->host_irq); + kfree(sdev->enable); + kfree(irq_entries); + return -ENOMEM; + } + + sdev_set_irq_entry(sdev, irq_entries); + + msi_for_each_desc(desc, &sdev->pdev->dev, MSI_DESC_ALL) { + if (!kvm_vgic_v4_set_forwarding(kvm, desc->irq, + &irq_entries[vec])) { + set_bit(vec, sdev->enable); + sdev->host_irq[vec] = desc->irq; + } else { + /* + * Can not use shadow device for direct injection, + * though not fatal... + */ + kvm_err("Shadow device set (%d) forwarding failed", + desc->irq); + } + vec++; + } + + kfree(irq_entries); + return 0; +} + +static void sdev_msi_entry_init(struct kvm_master_dev_info *mdi, + struct shadow_dev *sdev) +{ + int i; + + for (i = 0; i < sdev->nvecs; i++) { + sdev->msi[i].address_lo = mdi->msi[i].address_lo; + sdev->msi[i].address_hi = mdi->msi[i].address_hi; + sdev->msi[i].data = mdi->msi[i].data; + sdev->msi[i].flags = mdi->msi[i].flags; + sdev->msi[i].devid = mdi->msi[i].devid; + } +} + +int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev; + struct kvm_msi *msi; + unsigned long flags; + int ret; + + if (WARN_ON(!sdev_enable)) + return -EINVAL; + + ret = -ENOMEM; + sdev = kzalloc(sizeof(struct shadow_dev), GFP_KERNEL); + if (!sdev) + return ret; + + sdev->nvecs = mdi->nvectors; + + msi = kcalloc(sdev->nvecs, sizeof(struct kvm_msi), GFP_KERNEL); + if (!msi) + goto free_sdev; + + sdev->msi = msi; + sdev_msi_entry_init(mdi, sdev); + sdev->devid = sdev->msi[0].devid; + + sdev->pdev = sdev_virt_pdev_add(sdev->nvecs); + if (IS_ERR(sdev->pdev)) { + ret = PTR_ERR(sdev->pdev); + goto free_sdev_msi; + } + + ret = sdev_virq_bypass_active(kvm, sdev); + if (ret) + goto delete_virtdev; + + sdev->kvm = kvm; + INIT_WORK(&sdev->destroy, shadow_dev_destroy); + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + list_add_tail(&sdev->entry, &dist->sdev_list_head); + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + kvm_info("Create shadow device: 0x%x\n", sdev->devid); + return ret; + +delete_virtdev: + sdev_virt_pdev_delete(sdev->pdev); +free_sdev_msi: + kfree(sdev->msi); +free_sdev: + kfree(sdev); + return ret; +} + +static void sdev_virt_pdev_delete(struct platform_device *pdev) +{ + platform_device_unregister(pdev); +} + +static void sdev_virq_bypass_deactive(struct kvm *kvm, struct shadow_dev *sdev) +{ + struct kvm_kernel_irq_routing_entry *irq_entries; + struct msi_desc *desc; + u32 vec = 0; + + irq_entries = kcalloc(sdev->nvecs, + sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + if (!irq_entries) + return; + + sdev_set_irq_entry(sdev, irq_entries); + + msi_for_each_desc(desc, &sdev->pdev->dev, MSI_DESC_ALL) { + if (!kvm_vgic_v4_unset_forwarding(kvm, desc->irq, + &irq_entries[vec])) { + clear_bit(vec, sdev->enable); + sdev->host_irq[vec] = 0; + } else { + kvm_err("Shadow device unset (%d) forwarding failed", + desc->irq); + } + vec++; + } + + kfree(sdev->host_irq); + kfree(sdev->enable); + kfree(irq_entries); + + /* FIXME: no error handling */ +} + +static void shadow_dev_destroy(struct work_struct *work) +{ + struct shadow_dev *sdev = container_of(work, struct shadow_dev, destroy); + struct kvm *kvm = sdev->kvm; + + sdev_virq_bypass_deactive(kvm, sdev); + sdev_virt_pdev_delete(sdev->pdev); + + sdev->nvecs = 0; + kfree(sdev->msi); + kfree(sdev); +} + +void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev, *tmp; + unsigned long flags; + + if (WARN_ON(!sdev_enable)) + return; + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + WARN_ON(list_empty(&dist->sdev_list_head)); /* shouldn't be invoked */ + + list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) { + if (sdev->devid != devid) + continue; + + list_del(&sdev->entry); + queue_work(sdev_cleanup_wq, &sdev->destroy); + break; + } + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + flush_workqueue(sdev_cleanup_wq); +} + +void kvm_shadow_dev_delete_all(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct shadow_dev *sdev, *tmp; + unsigned long flags; + + if (!sdev_enable) + return; + + raw_spin_lock_irqsave(&dist->sdev_list_lock, flags); + + list_for_each_entry_safe(sdev, tmp, &dist->sdev_list_head, entry) { + list_del(&sdev->entry); + queue_work(sdev_cleanup_wq, &sdev->destroy); + } + + raw_spin_unlock_irqrestore(&dist->sdev_list_lock, flags); + + flush_workqueue(sdev_cleanup_wq); +} + +static int __init early_virt_msi_bypass(char *buf) +{ + return strtobool(buf, &virt_msi_bypass); +} +early_param("kvm-arm.virt_msi_bypass", early_virt_msi_bypass); + +void kvm_shadow_dev_init(void) +{ + /* + * FIXME: Ideally shadow device should only rely on a GICv4.0 + * capable ITS, but we should also take the reserved device ID + * pools into account. + */ + sdev_enable = kvm_vgic_global_state.has_gicv4 && virt_msi_bypass; + + sdev_cleanup_wq = alloc_workqueue("kvm-sdev-cleanup", 0, 0); + if (!sdev_cleanup_wq) + sdev_enable = false; + + kvm_info("Shadow device %sabled\n", sdev_enable ? "en" : "dis"); +} diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 07aa0437125a60b318f626abb556e35f635c11bc..20fed597531b09d6db7af08a578e7dd3ad29fb6c 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -193,7 +193,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, print_header(s, irq, vcpu); pending = irq->pending_latch; - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { int err; err = irq_get_irqchip_state(irq->host_irq, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index c8c3cb812783218e93065d157291d09c7a4a0812..5d727e5eec1620300765e0f0a0258745ce6d2b60 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -56,6 +56,9 @@ void kvm_vgic_early_init(struct kvm *kvm) INIT_LIST_HEAD(&dist->lpi_list_head); INIT_LIST_HEAD(&dist->lpi_translation_cache); raw_spin_lock_init(&dist->lpi_list_lock); + + INIT_LIST_HEAD(&dist->sdev_list_head); + raw_spin_lock_init(&dist->sdev_list_lock); } /* CREATION */ @@ -225,6 +228,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } + + /* Needed? */ + irq->vtimer_info = NULL; } if (!irqchip_in_kernel(vcpu->kvm)) diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 475059bacedf892f33c1ce8287c0a1f9dcd91a1a..78f508408ac9037743ce15c8a2ac055fe8bf9a3a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -9,6 +9,27 @@ #include #include "vgic.h" +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm_msi *msi); + +void kire_arch_cached_data_update(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kire_data *cache = &e->cache; + struct shadow_dev *sdev; + struct kvm_msi msi; + + kvm_populate_msi(e, &msi); + + raw_spin_lock(&dist->sdev_list_lock); + sdev = kvm_shadow_dev_get(kvm, &msi); + raw_spin_unlock(&dist->sdev_list_lock); + + cache->valid = !!sdev; + cache->data = sdev; +} + /** * vgic_irqfd_set_irq: inject the IRQ corresponding to the * irqchip routing entry @@ -98,6 +119,21 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return vgic_its_inject_msi(kvm, &msi); } +static int kvm_arch_set_irq_bypass(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm) +{ + struct kire_data *cache = &e->cache; + + /* + * FIXME: is there any race against the irqfd_update(), + * where the cache data will be updated? + */ + if (!cache->valid) + return -EWOULDBLOCK; + + return shadow_dev_virq_bypass_inject(kvm, e); +} + /** * kvm_arch_set_irq_inatomic: fast-path for irqfd injection */ @@ -115,6 +151,9 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, if (!vgic_has_its(kvm)) break; + if (!kvm_arch_set_irq_bypass(e, kvm)) + return 0; + kvm_populate_msi(e, &msi); return vgic_its_inject_cached_translation(kvm, &msi); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 188d2187eede935e43b31fcf4985205a1872d92f..36b50a9d64db50197d674176043dae42317909e3 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -374,6 +374,19 @@ static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); } else { + /** + * workaround: On reset, userspace clears pending status + * for all PPIs and SGIs by writing all 0's to + * GICR_ISPENDR0. The pending state of vtimer interrupt + * is somehow staying in redistributor and we have to + * explicitly clear it... + * + * P.S., irq->vtimer_info is NULL on restore. + */ + if (irq->vtimer_info) + WARN_ON_ONCE(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + false)); irq->pending_latch = false; raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index ff558c05e990c728abd5361054cb04ef44083818..52505976adb2955e295e58585d2e84859a82d698 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -78,7 +78,7 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { vgic_update_vsgi(irq); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); } else { @@ -125,7 +125,7 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { if (!irq->enabled) { struct irq_data *data; @@ -174,7 +174,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) + if (vgic_direct_sgi_or_ppi(irq) && irq->enabled) disable_irq_nosync(irq->host_irq); irq->enabled = false; @@ -250,7 +250,7 @@ static unsigned long __read_pending(struct kvm_vcpu *vcpu, * for handling of ISPENDR and ICPENDR. */ raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { int err; val = false; @@ -320,7 +320,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { /* HW SGI? Ask the GIC to inject it */ int err; err = irq_set_irqchip_state(irq->host_irq, @@ -413,7 +413,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, raw_spin_lock_irqsave(&irq->irq_lock, flags); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (vgic_direct_sgi_or_ppi(irq)) { /* HW SGI? Ask the GIC to clear its pending bit */ int err; err = irq_set_irqchip_state(irq->host_irq, @@ -510,12 +510,17 @@ static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vtimer_info *vtimer = irq->vtimer_info; + bool state = irq->active; + + if (vtimer) + state = vtimer->get_active_stat(vcpu, irq->intid); /* * Even for HW interrupts, don't evaluate the HW state as * all the guest is interested in is the virtual state. */ - if (irq->active) + if (state) value |= (1U << i); vgic_put_irq(vcpu->kvm, irq); @@ -575,6 +580,9 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * do here. */ irq->active = false; + } else if (irq->vtimer_info) { + /* MMIO trap only */ + irq->vtimer_info->set_active_stat(vcpu, irq->intid, active); } else { u32 model = vcpu->kvm->arch.vgic.vgic_model; u8 active_source; @@ -718,7 +726,7 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); - if (irq->hw && vgic_irq_is_sgi(irq->intid)) + if (vgic_direct_sgi_or_ppi(irq)) vgic_update_vsgi(irq); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 3dfc8b84e03e67868ff49cb72a97695a9222ef2b..f390efd51c5eea3ab73dafc792edb8b0f589f70c 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -571,6 +571,9 @@ int vgic_v3_map_resources(struct kvm *kvm) if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); + if (kvm_vgic_vtimer_irqbypass_support()) + vgic_v4_configure_vtimer(kvm); + return 0; } @@ -653,6 +656,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_info("GICv4%s support %sabled\n", kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", gicv4_enable ? "en" : "dis"); + + kvm_vgic_global_state.has_direct_vtimer = info->has_vtimer && gicv4_enable; + if (kvm_vgic_global_state.has_direct_vtimer) + kvm_info("vtimer-irqbypass support enabled at GIC level\n"); } kvm_vgic_global_state.vcpu_base = 0; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 339a55194b2c63e78a6c8083fe7acb34a3cfa5af..597ef0a4169478bc1995b44ec72feb7103f2e57d 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -204,6 +204,63 @@ void vgic_v4_configure_vsgis(struct kvm *kvm) kvm_arm_resume_guest(kvm); } +static void vgic_v4_enable_vtimer(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vtimer_info *vtimer = &vgic_cpu->vtimer; + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + struct vgic_irq *irq; + struct irq_desc *desc; + int ret; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vtimer->intid); + irq->host_irq = irq_find_mapping(vpe->sgi_domain, vtimer->intid); + + /* Transfer the full irq state to the vPE */ + vgic_v4_sync_sgi_config(vpe, irq); + desc = irq_to_desc(irq->host_irq); + ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc), + false); + if (!WARN_ON(ret)) { + /* Transfer pending state */ + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_ON(ret); + irq->pending_latch = false; + + /* Transfer active state */ + vtimer->set_active_stat(vcpu, irq->intid, irq->active); + irq->active = false; + } + + vgic_put_irq(vcpu->kvm, irq); +} + +/* Must be called with the kvm lock held */ +void vgic_v4_configure_vtimer(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + unsigned long i; + + if (!dist->vtimer_irqbypass) + return; + + kvm_for_each_vcpu(i, vcpu, kvm) + vgic_v4_enable_vtimer(vcpu); +} + +/** + * kvm_vgic_get_vcpu_vpeid - Get the VCPU's vpeid + * + * The vtimer mbigen needs the vcpu vpeid info which will resident. + */ +u16 kvm_vgic_get_vcpu_vpeid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vpe_id; +} + /* * Must be called with GICv4.1 and the vPE unmapped, which * indicates the invalidation of any VPT caches associated diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 8be4c1ebdec27a3e13c12914af80227c8d0eb32b..8b70d87e77864e3996aa76fc32e92e451cceeaf4 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -590,6 +590,31 @@ int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) return ret; } +int kvm_vgic_config_vtimer_irqbypass(struct kvm_vcpu *vcpu, u32 vintid, + bool (*get_as)(struct kvm_vcpu *, int), + void (*set_as)(struct kvm_vcpu *, int, bool)) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vtimer_info *vtimer = &vgic_cpu->vtimer; + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (WARN_ON_ONCE(!irq || !kvm_vgic_vtimer_irqbypass_support())) + return -EINVAL; + + vtimer->intid = vintid; + vtimer->get_active_stat = get_as; + vtimer->set_active_stat = set_as; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->vtimer_info = vtimer; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return 0; +} + + /** * kvm_vgic_set_owner - Set the owner of an interrupt for a VM * diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0ab09b0d44404b7f6d7a3bee5689b0dccadaf1a1..ae08a15d5b7e17382082258920d84823fa81bd1a 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -145,6 +145,16 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } +static inline bool vgic_direct_sgi_or_ppi(struct vgic_irq *irq) +{ + bool direct_sgi, direct_ppi; + + direct_sgi = irq->hw && vgic_irq_is_sgi(irq->intid); + direct_ppi = !!(irq->vtimer_info); + + return direct_sgi || direct_ppi; +} + /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -341,5 +351,6 @@ void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); +void vgic_v4_configure_vtimer(struct kvm *kvm); #endif diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index daa6d5053bc3531249df3a2f1fedfa91608167bd..d2b5fa4c70cdb89dc2a7315c9bd5911e4c450f17 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -10,6 +10,17 @@ #include #include +static struct irq_domain *vp_irq_domain; + +struct irq_domain *vp_get_irq_domain(void) +{ + if (!vp_irq_domain) + pr_err("virtual platform irqdomain hasn't be initialized!\n"); + + return vp_irq_domain; +} +EXPORT_SYMBOL_GPL(vp_get_irq_domain); + static struct irq_chip its_pmsi_irq_chip = { .name = "ITS-pMSI", }; @@ -43,6 +54,8 @@ int __weak iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id) return -1; } +extern bool rsv_devid_pool_cap; + static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *info) { @@ -52,6 +65,17 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, msi_info = msi_get_domain_info(domain->parent); + if (rsv_devid_pool_cap && !dev->of_node && !dev->fwnode) { + WARN_ON_ONCE(domain != vp_irq_domain); + /* + * virtual platform device doesn't have a DeviceID which + * will be allocated with core ITS's help. + */ + info->scratchpad[0].ul = -1; + + goto vdev_pmsi_prepare; + } + if (dev->of_node) ret = of_pmsi_get_dev_id(domain, dev, &dev_id); else @@ -62,8 +86,10 @@ static int its_pmsi_prepare(struct irq_domain *domain, struct device *dev, /* ITS specific DeviceID, as the core ITS ignores dev. */ info->scratchpad[0].ul = dev_id; +vdev_pmsi_prepare: /* Allocate at least 32 MSIs, and always as a power of 2 */ nvec = max_t(int, 32, roundup_pow_of_two(nvec)); + return msi_info->ops->msi_prepare(domain->parent, dev, nvec, info); } @@ -86,7 +112,7 @@ static const struct of_device_id its_device_id[] = { static int __init its_pmsi_init_one(struct fwnode_handle *fwnode, const char *name) { - struct irq_domain *parent; + struct irq_domain *pmsi_irqdomain, *parent; parent = irq_find_matching_fwnode(fwnode, DOMAIN_BUS_NEXUS); if (!parent || !msi_get_domain_info(parent)) { @@ -94,13 +120,20 @@ static int __init its_pmsi_init_one(struct fwnode_handle *fwnode, return -ENXIO; } - if (!platform_msi_create_irq_domain(fwnode, &its_pmsi_domain_info, - parent)) { + pmsi_irqdomain = platform_msi_create_irq_domain(fwnode, + &its_pmsi_domain_info, + parent); + if (!pmsi_irqdomain) { pr_err("%s: unable to create platform domain\n", name); return -ENXIO; } pr_info("Platform MSI: %s domain created\n", name); + + /* Should we take other irqdomains into account? */ + if (!vp_irq_domain) + vp_irq_domain = pmsi_irqdomain; + return 0; } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9a7a74239eabb7cd3d2a3a077316d54833ec18cb..a5d0800c7ff08940f35a4bec449ab0e8641b5590 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -33,12 +33,107 @@ #include #include #include +#include #include #include #include "irq-gic-common.h" +/* a reserved bus id region */ +struct plat_rsv_buses { + u8 start; /* the first reserved bus id */ + u8 count; +}; + +/* + * Build a devid pool per reserved bus id region, where all + * device ids should be unused by physical PCI devices. + */ +struct rsv_devid_pool { + struct list_head entry; + + struct plat_rsv_buses buses; + u32 start; + u32 end; + + raw_spinlock_t devid_bm_lock; + unsigned long *devid_bm; +}; + +static LIST_HEAD(rsv_devid_pools); +static DEFINE_RAW_SPINLOCK(rsv_devid_pools_lock); + +/* Do we have usable rsv_devid_pool? Initialized to be true. */ +bool rsv_devid_pool_cap = true; +static u8 rsv_buses_start, rsv_buses_count; + +static int __init rsv_buses_start_cfg(char *buf) +{ + return kstrtou8(buf, 0, &rsv_buses_start); +} +early_param("irqchip.gicv3_rsv_buses_start", rsv_buses_start_cfg); + +static int __init rsv_buses_count_cfg(char *buf) +{ + return kstrtou8(buf, 0, &rsv_buses_count); +} +early_param("irqchip.gicv3_rsv_buses_count", rsv_buses_count_cfg); + +static void get_rsv_buses_resource(struct plat_rsv_buses *buses) +{ + buses->start = rsv_buses_start; + buses->count = rsv_buses_count; + + /* + * FIXME: There is no architectural way to get the *correct* + * reserved bus id info. + * + * The first thought is to increase the GITS_TYPER.Devbits for + * the usage for virtualization, but this will break all + * command layouts with DeviceID as an argument (e.g., INT). + * + * The second way is to decrease the GITS_TYPER.Devids so that + * SW can pick the unused device IDs for use (these IDs should + * actually be supported at HW level, though not exposed). + * *Or* fetch the information with the help of firmware. They + * are essentially the same way. + */ +} + +static int probe_devid_pool_one(void) +{ + struct rsv_devid_pool *devid_pool; + + devid_pool = kzalloc(sizeof(*devid_pool), GFP_KERNEL); + if (!devid_pool) + return -ENOMEM; + + get_rsv_buses_resource(&devid_pool->buses); + raw_spin_lock_init(&devid_pool->devid_bm_lock); + + devid_pool->start = PCI_DEVID(devid_pool->buses.start, 0); + devid_pool->end = PCI_DEVID(devid_pool->buses.start + devid_pool->buses.count, 0); + + if (devid_pool->end == devid_pool->start) { + kfree(devid_pool); + return -EINVAL; + } + + devid_pool->devid_bm = bitmap_zalloc(devid_pool->end - devid_pool->start, + GFP_KERNEL); + if (!devid_pool->devid_bm) { + kfree(devid_pool); + return -ENOMEM; + } + + raw_spin_lock(&rsv_devid_pools_lock); + list_add(&devid_pool->entry, &rsv_devid_pools); + raw_spin_unlock(&rsv_devid_pools_lock); + + return 0; +} + #define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0) #define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1) #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) @@ -116,6 +211,7 @@ struct its_node { int numa_node; unsigned int msi_domain_flags; u32 pre_its_base; /* for Socionext Synquacer */ + u32 version; int vlpi_redist_offset; }; @@ -123,6 +219,23 @@ struct its_node { #define is_v4_1(its) (!!((its)->typer & GITS_TYPER_VMAPP)) #define device_ids(its) (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1) +#define is_vtimer_irqbypass(its) (!!((its)->version & GITS_VERSION_VTIMER)) + +/* Fetch it from gtdt->virtual_timer_interrupt. */ +#define is_vtimer_irq(irq) ((irq) == 27) + +static inline bool is_its_vsgi_cmd_valid(struct its_node *its, u8 hwirq) +{ + if (__get_intid_range(hwirq) == SGI_RANGE) + return true; + + /* For PPI range, only vtimer interrupt is supported atm. */ + if (is_vtimer_irq(hwirq) && is_vtimer_irqbypass(its)) + return true; + + return false; +} + #define ITS_ITT_ALIGN SZ_256 /* The maximum number of VPEID bits supported by VLPI commands */ @@ -166,6 +279,10 @@ struct its_device { u32 nr_ites; u32 device_id; bool shared; + + /* For virtual devices which needed the devid managed */ + bool is_vdev; + struct rsv_devid_pool *devid_pool; }; static struct { @@ -193,11 +310,77 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock); static DEFINE_IDA(its_vpeid_ida); +static void free_devid_to_rsv_pools(struct its_device *its_dev) +{ + struct rsv_devid_pool *pool = its_dev->devid_pool; + u32 id, size; + + WARN_ON(!pool); + + id = its_dev->device_id - pool->start; + size = pool->end - pool->start; + WARN_ON(id >= size); + + raw_spin_lock(&pool->devid_bm_lock); + clear_bit(id, pool->devid_bm); + raw_spin_unlock(&pool->devid_bm_lock); + + pr_debug("ITS: free devid (%u) to rsv_devid_pools\n", its_dev->device_id); +} + +static int alloc_devid_from_rsv_pools(struct rsv_devid_pool **devid_pool, + u32 *dev_id) +{ + struct rsv_devid_pool *pool; + int err = -ENOSPC; + + raw_spin_lock(&rsv_devid_pools_lock); + list_for_each_entry(pool, &rsv_devid_pools, entry) { + u32 size, id; + + size = pool->end - pool->start; + + raw_spin_lock(&pool->devid_bm_lock); + id = find_first_zero_bit(pool->devid_bm, size); + if (id >= size) { + /* No usable device id in this pool, try next. */ + raw_spin_unlock(&pool->devid_bm_lock); + continue; + } + + *dev_id = pool->start + id; + set_bit(id, pool->devid_bm); + raw_spin_unlock(&pool->devid_bm_lock); + + *devid_pool = pool; + err = 0; + break; + } + raw_spin_unlock(&rsv_devid_pools_lock); + + pr_debug("ITS: alloc devid (%u) from rsv_devid_pools\n", *dev_id); + return err; +} + #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) #define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) +/* + * Currently we only build *one* devid pool. + */ +static int build_devid_pools(void) +{ + struct its_node *its; + + its = list_first_entry(&its_nodes, struct its_node, entry); + if (readl_relaxed(its->base + GITS_IIDR) != 0x00051736) + return -EINVAL; + + return probe_devid_pool_one(); +} + /* * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we * always have vSGIs mapped. @@ -578,6 +761,16 @@ static void its_encode_sgi_intid(struct its_cmd_block *cmd, u8 sgi) its_mask_encode(&cmd->raw_cmd[0], sgi, 35, 32); } +static void its_encode_sgi_intid_extension(struct its_cmd_block *cmd, u8 sgi) +{ + /* + * We reuse the VSGI command in this implementation to configure + * the vPPI or clear its pending state. The vINTID field has been + * therefore extended to [36:32]. + */ + its_mask_encode(&cmd->raw_cmd[0], sgi, 36, 32); +} + static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio) { its_mask_encode(&cmd->raw_cmd[0], prio >> 4, 23, 20); @@ -977,7 +1170,10 @@ static struct its_vpe *its_build_vsgi_cmd(struct its_node *its, its_encode_cmd(cmd, GITS_CMD_VSGI); its_encode_vpeid(cmd, desc->its_vsgi_cmd.vpe->vpe_id); - its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi); + if (!is_vtimer_irqbypass(its)) + its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi); + else + its_encode_sgi_intid_extension(cmd, desc->its_vsgi_cmd.sgi); its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority); its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group); its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear); @@ -3475,6 +3671,12 @@ static void its_free_device(struct its_device *its_dev) raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); kfree(its_dev->event_map.col_map); kfree(its_dev->itt); + + if (its_dev->is_vdev) { + WARN_ON(!rsv_devid_pool_cap); + free_devid_to_rsv_pools(its_dev); + } + kfree(its_dev); } @@ -3502,6 +3704,8 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, struct msi_domain_info *msi_info; u32 dev_id; int err = 0; + int use_devid_pool = false; + struct rsv_devid_pool *pool = NULL; /* * We ignore "dev" entirely, and rely on the dev_id that has @@ -3511,6 +3715,18 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, */ dev_id = info->scratchpad[0].ul; + if (rsv_devid_pool_cap && !dev->of_node && !dev->fwnode && + info->scratchpad[0].ul == -1) + use_devid_pool = true; + + if (use_devid_pool) { + err = alloc_devid_from_rsv_pools(&pool, &dev_id); + if (err) { + pr_warn("ITS: No remaining device id\n"); + return err; + } + } + msi_info = msi_get_domain_info(domain); its = msi_info->data; @@ -3527,6 +3743,9 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, mutex_lock(&its->dev_alloc_lock); its_dev = its_find_device(its, dev_id); if (its_dev) { + /* Impossible ...*/ + WARN_ON_ONCE(use_devid_pool); + /* * We already have seen this ID, probably through * another alias (PCI bridge of some sort). No need to @@ -3547,6 +3766,11 @@ static int its_msi_prepare(struct irq_domain *domain, struct device *dev, its_dev->shared = true; pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); + + if (use_devid_pool) { + its_dev->is_vdev = true; + its_dev->devid_pool = pool; + } out: mutex_unlock(&its->dev_alloc_lock); info->scratchpad[0].ptr = its_dev; @@ -4204,8 +4428,14 @@ static struct irq_chip its_vpe_4_1_irq_chip = { static void its_configure_sgi(struct irq_data *d, bool clear) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_node *its = find_4_1_its(); struct its_cmd_desc desc; + if (!its || !is_its_vsgi_cmd_valid(its, d->hwirq)) { + pr_err("ITS: its_configure_sgi failed\n"); + return; + } + desc.its_vsgi_cmd.vpe = vpe; desc.its_vsgi_cmd.sgi = d->hwirq; desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority; @@ -4218,7 +4448,7 @@ static void its_configure_sgi(struct irq_data *d, bool clear) * destination VPE is mapped there. Since we map them eagerly at * activation time, we're pretty sure the first GICv4.1 ITS will do. */ - its_send_single_vcommand(find_4_1_its(), its_build_vsgi_cmd, &desc); + its_send_single_vcommand(its, its_build_vsgi_cmd, &desc); } static void its_sgi_mask_irq(struct irq_data *d) @@ -4260,11 +4490,15 @@ static int its_sgi_set_irqchip_state(struct irq_data *d, if (state) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); struct its_node *its = find_4_1_its(); + u64 offset = GITS_SGIR; u64 val; + if (__get_intid_range(d->hwirq) == PPI_RANGE) + offset = GITS_PPIR; + val = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id); val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq); - writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K); + writeq_relaxed(val, its->sgir_base + offset - SZ_128K); } else { its_configure_sgi(d, true); } @@ -4276,6 +4510,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, enum irqchip_irq_state which, bool *val) { struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + enum gic_intid_range type; void __iomem *base; unsigned long flags; u32 count = 1000000; /* 1s! */ @@ -4285,6 +4520,17 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, if (which != IRQCHIP_STATE_PENDING) return -EINVAL; + /* + * Plug the HiSilicon implementation details in comment! + * + * For vPPI, we re-use the GICR_VSGIR and GICR_VSGIPENDR in the + * implementation which allows reads to GICR_I{S,C}PENDR to be + * emulated. And note that the pending state of the vtimer + * interrupt is stored at bit[16] of GICR_VSGIPENDR. + */ + type = __get_intid_range(d->hwirq); + WARN_ON_ONCE(type == PPI_RANGE && !is_vtimer_irq(d->hwirq)); + /* * Locking galore! We can race against two different events: * @@ -4320,7 +4566,10 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, if (!count) return -ENXIO; - *val = !!(status & (1 << d->hwirq)); + if (is_vtimer_irq(d->hwirq)) + *val = !!(status & (1 << 16)); + else + *val = !!(status & (1 << d->hwirq)); return 0; } @@ -4359,10 +4608,10 @@ static int its_sgi_irq_domain_alloc(struct irq_domain *domain, struct its_vpe *vpe = args; int i; - /* Yes, we do want 16 SGIs */ - WARN_ON(nr_irqs != 16); + /* We may want 32 IRQs if vtimer irqbypass is supported. */ + WARN_ON(nr_irqs != 16 && nr_irqs != 32); - for (i = 0; i < 16; i++) { + for (i = 0; i < nr_irqs; i++) { vpe->sgi_config[i].priority = 0; vpe->sgi_config[i].enabled = false; vpe->sgi_config[i].group = false; @@ -5369,7 +5618,8 @@ static struct its_node __init *its_node_init(struct resource *res, its->numa_node = numa_node; its->fwnode_handle = handle; - + if (readl_relaxed(its_base + GITS_IIDR) == 0x00051736) + its->version = readl_relaxed(its_base + GITS_VERSION); return its; out_unmap: @@ -5654,6 +5904,7 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, struct its_node *its; bool has_v4 = false; bool has_v4_1 = false; + bool has_vtimer_irqbypass = false; int err; gic_rdists = rdists; @@ -5677,12 +5928,19 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, list_for_each_entry(its, &its_nodes, entry) { has_v4 |= is_v4(its); has_v4_1 |= is_v4_1(its); + has_vtimer_irqbypass |= is_vtimer_irqbypass(its); } /* Don't bother with inconsistent systems */ if (WARN_ON(!has_v4_1 && rdists->has_rvpeid)) rdists->has_rvpeid = false; + /* vtimer irqbypass depends on rvpeid support */ + if (WARN_ON(!has_v4_1 && has_vtimer_irqbypass)) { + has_vtimer_irqbypass = false; + rdists->has_vtimer = false; + } + if (has_v4 & rdists->has_vlpis) { const struct irq_domain_ops *sgi_ops; @@ -5692,10 +5950,17 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, sgi_ops = NULL; if (its_init_vpe_domain() || - its_init_v4(parent_domain, &its_vpe_domain_ops, sgi_ops)) { + its_init_v4(parent_domain, &its_vpe_domain_ops, + sgi_ops, has_vtimer_irqbypass)) { rdists->has_vlpis = false; pr_err("ITS: Disabling GICv4 support\n"); } + + if (build_devid_pools()) + rsv_devid_pool_cap = false; + + if (rsv_devid_pool_cap) + pr_info("ITS: reserved device id pools enabled\n"); } register_syscore_ops(&its_syscore_ops); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 874fbc4a2da69c62bedf124d75bc9bec183a57d8..0421cf9722f55f05b6f4ecdaeb7bfb482903753b 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -139,34 +139,9 @@ static DEFINE_PER_CPU(bool, has_rss); /* Our default, arbitrary priority value. Linux only uses one anyway. */ #define DEFAULT_PMR_VALUE 0xf0 -enum gic_intid_range { - SGI_RANGE, - PPI_RANGE, - SPI_RANGE, - EPPI_RANGE, - ESPI_RANGE, - LPI_RANGE, - __INVALID_RANGE__ -}; - -static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) -{ - switch (hwirq) { - case 0 ... 15: - return SGI_RANGE; - case 16 ... 31: - return PPI_RANGE; - case 32 ... 1019: - return SPI_RANGE; - case EPPI_BASE_INTID ... (EPPI_BASE_INTID + 63): - return EPPI_RANGE; - case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023): - return ESPI_RANGE; - case 8192 ... GENMASK(23, 0): - return LPI_RANGE; - default: - return __INVALID_RANGE__; - } +phys_addr_t get_gicr_paddr(int cpu) +{ + return (per_cpu_ptr(gic_data.rdists.rdist, cpu))->phys_base; } static enum gic_intid_range get_intid_range(struct irq_data *d) @@ -1110,6 +1085,9 @@ static int __gic_update_rdist_properties(struct redist_region *region, gic_data.rdists.has_rvpeid = false; } + /* HiSilicon implement: if GICv4.1 is supported, vtimer irqbypass is supported */ + gic_data.rdists.has_vtimer = gic_data.rdists.has_rvpeid; + gic_data.ppi_nr = min(GICR_TYPER_NR_PPIS(typer), gic_data.ppi_nr); return 1; @@ -2065,6 +2043,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_data.rdists.has_vlpis = true; gic_data.rdists.has_direct_lpi = true; gic_data.rdists.has_vpend_valid_dirty = true; + gic_data.rdists.has_vtimer = false; } if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { @@ -2241,6 +2220,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_v3_kvm_info.has_vtimer = gic_data.rdists.has_vtimer; vgic_set_kvm_info(&gic_v3_kvm_info); } @@ -2583,6 +2563,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_v3_kvm_info.has_vtimer = gic_data.rdists.has_vtimer; vgic_set_kvm_info(&gic_v3_kvm_info); } diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 94d56a03b1757f1336587898de72853bc65abf35..b804a55efec7130ae5ca87e94a19fd76b582a83b 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -86,6 +86,7 @@ static struct irq_domain *gic_domain; static const struct irq_domain_ops *vpe_domain_ops; static const struct irq_domain_ops *sgi_domain_ops; +static bool vtimer_irqbypass; #ifdef CONFIG_ARM64 #include @@ -110,6 +111,11 @@ static bool has_v4_1(void) return !!sgi_domain_ops; } +static bool has_v4_1_vsgi_extend(void) +{ + return has_v4_1() && vtimer_irqbypass; +} + static bool has_v4_1_sgi(void) { return has_v4_1() && gic_cpuif_has_vsgi(); @@ -119,10 +125,14 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) { char *name; int sgi_base; + int nr_irqs = 16; if (!has_v4_1_sgi()) return 0; + if (has_v4_1_vsgi_extend()) + nr_irqs = 32; + name = kasprintf(GFP_KERNEL, "GICv4-sgi-%d", task_pid_nr(current)); if (!name) goto err; @@ -134,18 +144,20 @@ static int its_alloc_vcpu_sgis(struct its_vpe *vpe, int idx) kfree(name); name = NULL; - vpe->sgi_domain = irq_domain_create_linear(vpe->fwnode, 16, + vpe->sgi_domain = irq_domain_create_linear(vpe->fwnode, nr_irqs, sgi_domain_ops, vpe); if (!vpe->sgi_domain) goto err; - sgi_base = irq_domain_alloc_irqs(vpe->sgi_domain, 16, NUMA_NO_NODE, vpe); + vpe->nr_irqs = nr_irqs; + sgi_base = irq_domain_alloc_irqs(vpe->sgi_domain, nr_irqs, NUMA_NO_NODE, vpe); if (sgi_base <= 0) goto err; return 0; err: + vpe->nr_irqs = 0; if (vpe->sgi_domain) irq_domain_remove(vpe->sgi_domain); if (vpe->fwnode) @@ -211,7 +223,7 @@ static void its_free_sgi_irqs(struct its_vm *vm) if (WARN_ON(!irq)) continue; - irq_domain_free_irqs(irq, 16); + irq_domain_free_irqs(irq, vm->vpes[i]->nr_irqs); irq_domain_remove(vm->vpes[i]->sgi_domain); irq_domain_free_fwnode(vm->vpes[i]->fwnode); } @@ -374,13 +386,16 @@ int its_prop_update_vsgi(int irq, u8 priority, bool group) int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *vpe_ops, - const struct irq_domain_ops *sgi_ops) + const struct irq_domain_ops *sgi_ops, + bool has_vtimer_irqbypass) { if (domain) { pr_info("ITS: Enabling GICv4 support\n"); gic_domain = domain; vpe_domain_ops = vpe_ops; sgi_domain_ops = sgi_ops; + vtimer_irqbypass = has_vtimer_irqbypass; + return 0; } diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 5101a3fb11df5bef53122db9db3c194669d754e7..fd190c460a32c1fa80eb24e334bed1d17dc3e7e3 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -15,6 +15,15 @@ #include #include #include +#include +#include +#include +#include +#include +#include + +#include +#include /* Interrupt numbers per mbigen node supported */ #define IRQS_PER_MBIGEN_NODE 128 @@ -53,6 +62,50 @@ */ #define REG_MBIGEN_TYPE_OFFSET 0x0 +#define MBIGEN_CTLR 0x0 +#define MBIGEN_AFF3_MASK 0xff000000 +#define MBIGEN_AFF3_SHIFT 24 + +/** + * MBIX config register + * bit[25:24] mbi_type: + * - 0b10 support vtimer irqbypass + */ +#define MBIGEN_NODE_CFG_OFFSET 0x0004 +#define MBIGEN_TYPE_MASK 0x03000000 +#define MBIGEN_TYPE_SHIFT 24 +#define TYPE_VTIMER_ENABLED 0x02 + +#define VTIMER_MBIGEN_REG_WIDTH 4 +#define PPIS_PER_MBIGEN_NODE 32 +#define VTIMER_MBIGEN_REG_TYPE_OFFSET 0x1000 +#define VTIMER_MBIGEN_REG_SET_AUTO_CLR_OFFSET 0x1100 +#define VTIMER_MBIGEN_REG_CLR_AUTO_CLR_OFFSET 0x1110 +#define VTIMER_MBIGEN_REG_ATV_STAT_OFFSET 0x1120 +#define VTIMER_GIC_REG_SET_AUTO_CLR_OFFSET 0x1150 +#define VTIMER_GIC_REG_CLR_AUTO_CLR_OFFSET 0x1160 +#define VTIMER_MBIGEN_REG_VEC_OFFSET 0x1200 +#define VTIMER_MBIGEN_REG_ATV_CLR_OFFSET 0xa008 + +/** + * struct vtimer_mbigen_device - holds the information of vtimer mbigen device. + * + * @base: mapped address of this mbigen chip. + * @cpu_base : the base cpu_id attached to the mbigen chip. + * @cpu_num : the num of the cpus attached to the mbigen chip. + * @mpidr_aff3 : [socket_id : die_id] of the mbigen chip. + * @entry: list_head connecting this vtimer_mbigen to the full list. + * @vmgn_lock: spinlock for set type. + */ +struct vtimer_mbigen_device { + void __iomem *base; + int cpu_base; + int cpu_num; + int mpidr_aff3; + struct list_head entry; + spinlock_t vmgn_lock; +}; + /** * struct mbigen_device - holds the information of mbigen device. * @@ -62,8 +115,181 @@ struct mbigen_device { struct platform_device *pdev; void __iomem *base; + struct vtimer_mbigen_device *vtimer_mbigen_chip; }; +static LIST_HEAD(vtimer_mgn_list); + +cpumask_t vtimer_cpu_mask; + +/** + * Due to the existence of hyper-threading technology, We need to get the + * absolute offset of a cpu relative to the base cpu. + */ +#define GICR_LENGTH 0x40000 +static inline int get_abs_offset(int cpu, int cpu_base) +{ + return ((get_gicr_paddr(cpu) - get_gicr_paddr(cpu_base)) / GICR_LENGTH); +} + +static struct vtimer_mbigen_device *get_vtimer_mbigen(int cpu_id) +{ + unsigned int mpidr_aff3; + struct vtimer_mbigen_device *chip; + + mpidr_aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu_id), 3); + + list_for_each_entry(chip, &vtimer_mgn_list, entry) { + if (chip->mpidr_aff3 == mpidr_aff3) + return chip; + } + + pr_debug("Failed to get vtimer mbigen of cpu%d!\n", cpu_id); + return NULL; +} + +void vtimer_mbigen_set_vector(int cpu_id, u16 vpeid) +{ + + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset, count = 100; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + addr = chip->base + VTIMER_MBIGEN_REG_VEC_OFFSET + + cpu_abs_offset * VTIMER_MBIGEN_REG_WIDTH; + + writel_relaxed(vpeid, addr); + + /* Make sure correct vpeid set */ + do { + if (readl_relaxed(addr) == vpeid) + break; + } while (count--); + + if (!count) + pr_err("Failed to set mbigen vector of CPU%d!\n", cpu_id); +} + +bool vtimer_mbigen_get_active(int cpu_id) +{ + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset; + u32 val; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return false; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + addr = chip->base + VTIMER_MBIGEN_REG_ATV_STAT_OFFSET + + (cpu_abs_offset / PPIS_PER_MBIGEN_NODE) * VTIMER_MBIGEN_REG_WIDTH; + + dsb(sy); + val = readl_relaxed(addr); + return (!!(val & (1 << (cpu_abs_offset % PPIS_PER_MBIGEN_NODE)))); +} + +void vtimer_mbigen_set_auto_clr(int cpu_id, bool set) +{ + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset; + u64 offset; + u32 val; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + offset = set ? VTIMER_MBIGEN_REG_SET_AUTO_CLR_OFFSET : + VTIMER_MBIGEN_REG_CLR_AUTO_CLR_OFFSET; + addr = chip->base + offset + + (cpu_abs_offset / PPIS_PER_MBIGEN_NODE) * VTIMER_MBIGEN_REG_WIDTH; + val = 1 << (cpu_abs_offset % PPIS_PER_MBIGEN_NODE); + + writel_relaxed(val, addr); + dsb(sy); +} + +void vtimer_gic_set_auto_clr(int cpu_id, bool set) +{ + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset; + u64 offset; + u32 val; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + offset = set ? VTIMER_GIC_REG_SET_AUTO_CLR_OFFSET : + VTIMER_GIC_REG_CLR_AUTO_CLR_OFFSET; + addr = chip->base + offset + + (cpu_abs_offset / PPIS_PER_MBIGEN_NODE) * VTIMER_MBIGEN_REG_WIDTH; + val = 1 << (cpu_abs_offset % PPIS_PER_MBIGEN_NODE); + + writel_relaxed(val, addr); + dsb(sy); +} + +void vtimer_mbigen_set_active(int cpu_id, bool set) +{ + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset; + u64 offset; + u32 val; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + offset = set ? VTIMER_MBIGEN_REG_ATV_STAT_OFFSET : + VTIMER_MBIGEN_REG_ATV_CLR_OFFSET; + addr = chip->base + offset + + (cpu_abs_offset / PPIS_PER_MBIGEN_NODE) * VTIMER_MBIGEN_REG_WIDTH; + val = 1 << (cpu_abs_offset % PPIS_PER_MBIGEN_NODE); + + writel_relaxed(val, addr); + dsb(sy); +} + +static int vtimer_mbigen_set_type(unsigned int cpu_id) +{ + struct vtimer_mbigen_device *chip; + void __iomem *addr; + int cpu_abs_offset; + u32 val, mask; + + chip = get_vtimer_mbigen(cpu_id); + if (!chip) + return -EINVAL; + + cpu_abs_offset = get_abs_offset(cpu_id, chip->cpu_base); + addr = chip->base + VTIMER_MBIGEN_REG_TYPE_OFFSET + + (cpu_abs_offset / PPIS_PER_MBIGEN_NODE) * VTIMER_MBIGEN_REG_WIDTH; + + mask = 1 << (cpu_abs_offset % PPIS_PER_MBIGEN_NODE); + + spin_lock(&chip->vmgn_lock); + val = readl_relaxed(addr); + val |= mask; + writel_relaxed(val, addr); + dsb(sy); + spin_unlock(&chip->vmgn_lock); + return 0; +} + static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) { unsigned int nid, pin; @@ -338,6 +564,245 @@ static inline int mbigen_acpi_create_domain(struct platform_device *pdev, } #endif +static void vtimer_mbigen_set_kvm_info(void) +{ + struct arch_timer_kvm_info *info = arch_timer_get_kvm_info(); + + info->irqbypass_flag |= VT_EXPANDDEV_PROBED; +} + +static int vtimer_mbigen_chip_read_aff3(struct vtimer_mbigen_device *chip) +{ + void __iomem *base = chip->base; + void __iomem *addr = base + MBIGEN_CTLR; + u32 val = readl_relaxed(addr); + + return ((val & MBIGEN_AFF3_MASK) >> MBIGEN_AFF3_SHIFT); +} + +static int vtimer_mbigen_chip_match_cpu(struct vtimer_mbigen_device *chip) +{ + int cpu; + + chip->cpu_base = -1; + chip->cpu_num = 0; + + for_each_possible_cpu(cpu) { + int mpidr_aff3 = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 3); + + if (chip->mpidr_aff3 == mpidr_aff3) { + /* get the first cpu attached to the mbigen */ + if (chip->cpu_base == -1) { + /* Make sure cpu_base is attached to PIN0 */ + u64 mpidr = cpu_logical_map(cpu); + if (!MPIDR_AFFINITY_LEVEL(mpidr, 2) && + !MPIDR_AFFINITY_LEVEL(mpidr, 1) && + !MPIDR_AFFINITY_LEVEL(mpidr, 0)) + chip->cpu_base = cpu; + } + + chip->cpu_num++; + /* + * check if this cpu has already + * attached to another mbigen + */ + if (cpumask_test_and_set_cpu(cpu, &vtimer_cpu_mask)) { + WARN_ON_ONCE(1); + return -EINVAL; + } + } + } + + if (chip->cpu_base == -1 || chip->cpu_num > IRQS_PER_MBIGEN_NODE) + return -EINVAL; + + return 0; +} + +static bool is_mbigen_vtimer_bypass_enabled(struct mbigen_device *mgn_chip) +{ + void __iomem *base = mgn_chip->base; + void __iomem *addr = base + MBIGEN_NODE_CFG_OFFSET; + u32 val = readl_relaxed(addr); + + return ((val & MBIGEN_TYPE_MASK) >> MBIGEN_TYPE_SHIFT) + == TYPE_VTIMER_ENABLED; +} + +/** + * MBIX_VPPI_ITS_TA: Indicates the address of the ITS corresponding + * to the mbigen. + */ +#define MBIX_VPPI_ITS_TA 0x0038 +static bool vtimer_mbigen_should_probe(struct mbigen_device *mgn_chip) +{ + unsigned int mpidr_aff3; + struct vtimer_mbigen_device *chip; + void __iomem *addr; + u32 val; + + /* find the valid mbigen */ + addr = mgn_chip->base + MBIX_VPPI_ITS_TA; + val = readl_relaxed(addr); + if (!val) + return false; + + addr = mgn_chip->base + MBIGEN_CTLR; + val = readl_relaxed(addr); + mpidr_aff3 = (val & MBIGEN_AFF3_MASK) >> MBIGEN_AFF3_SHIFT; + list_for_each_entry(chip, &vtimer_mgn_list, entry) { + if (chip->mpidr_aff3 == mpidr_aff3) + return false; + } + + return true; +} + +#define CHIP0_TA_MBIGEN_PHY_BASE 0x4604400000 +#define CHIP0_TA_MBIGEN_ITS_BASE 0x84028 +#define CHIP0_TA_PERI_PHY_BASE 0x4614002018 + +#define CHIP0_TB_MBIGEN_PHY_BASE 0xc604400000 +#define CHIP0_TB_PERI_PHY_BASE 0xc614002018 +#define CHIP0_TB_MBIGEN_ITS_BASE 0x4028 + +#define CHIP1_TA_MBIGEN_PHY_BASE 0x204604400000 +#define CHIP1_TA_PERI_PHY_BASE 0x204614002018 +#define CHIP1_TA_MBIGEN_ITS_BASE 0x2084028 + +#define CHIP1_TB_MBIGEN_PHY_BASE 0x20c604400000 +#define CHIP1_TB_MBIGEN_ITS_BASE 0x2004028 +#define CHIP1_TB_PERI_PHY_BASE 0x20c614002018 + +extern bool vtimer_irqbypass; + +static int vtimer_mbigen_set_regs(struct platform_device *pdev) +{ + struct mbigen_device *mgn_chip = platform_get_drvdata(pdev); + struct resource *res; + void __iomem *addr; + unsigned int mpidr_aff3; + u32 val; + struct vtimer_mbigen_device *chip; + + addr = mgn_chip->base + MBIGEN_CTLR; + val = readl_relaxed(addr); + mpidr_aff3 = (val & MBIGEN_AFF3_MASK) >> MBIGEN_AFF3_SHIFT; + list_for_each_entry(chip, &vtimer_mgn_list, entry) { + if (chip->mpidr_aff3 == mpidr_aff3) + return 0; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mgn_chip) + return -ENOMEM; + + if (res->start == CHIP0_TA_MBIGEN_PHY_BASE) { + addr = ioremap(CHIP0_TA_PERI_PHY_BASE, 4); + if (!addr) { + pr_err("Unable to map CHIP0-TA-PERI\n"); + return -ENOMEM; + } + + writel_relaxed(1, addr); + iounmap(addr); + + addr = mgn_chip->base + MBIX_VPPI_ITS_TA; + writel_relaxed(CHIP0_TA_MBIGEN_ITS_BASE, addr); + } + + if (res->start == CHIP0_TB_MBIGEN_PHY_BASE) { + addr = ioremap(CHIP0_TB_PERI_PHY_BASE, 4); + if (!addr) { + pr_err("Unable to map CHIP0-TB-PERI\n"); + return -ENOMEM; + } + + writel_relaxed(1, addr); + iounmap(addr); + + addr = mgn_chip->base + MBIX_VPPI_ITS_TA; + writel_relaxed(CHIP0_TB_MBIGEN_ITS_BASE, addr); + } + + if (res->start == CHIP1_TA_MBIGEN_PHY_BASE) { + addr = ioremap(CHIP1_TA_PERI_PHY_BASE, 4); + if (!addr) { + pr_err("Unable to map CHIP1-TA-PERI\n"); + return -ENOMEM; + } + + writel_relaxed(1, addr); + iounmap(addr); + + addr = mgn_chip->base + MBIX_VPPI_ITS_TA; + writel_relaxed(CHIP1_TA_MBIGEN_ITS_BASE, addr); + } + + if (res->start == CHIP1_TB_MBIGEN_PHY_BASE) { + addr = ioremap(CHIP1_TB_PERI_PHY_BASE, 4); + if (!addr) { + pr_err("Unable to map CHIP1-TB-PERI\n"); + return -ENOMEM; + } + + writel_relaxed(1, addr); + iounmap(addr); + + addr = mgn_chip->base + MBIX_VPPI_ITS_TA; + writel_relaxed(CHIP1_TB_MBIGEN_ITS_BASE, addr); + } + + return 0; +} + +static int vtimer_mbigen_device_probe(struct platform_device *pdev) +{ + struct mbigen_device *mgn_chip = platform_get_drvdata(pdev); + struct vtimer_mbigen_device *vtimer_mgn_chip; + int err; + + if (!vtimer_irqbypass) + return 0; + + err = vtimer_mbigen_set_regs(pdev); + if (err) + return err; + + if (!is_mbigen_vtimer_bypass_enabled(mgn_chip) || + !vtimer_mbigen_should_probe(mgn_chip)) + return 0; + + vtimer_mgn_chip = kzalloc(sizeof(*vtimer_mgn_chip), GFP_KERNEL); + if (!vtimer_mgn_chip) + return -ENOMEM; + + mgn_chip->vtimer_mbigen_chip = vtimer_mgn_chip; + vtimer_mgn_chip->base = mgn_chip->base; + vtimer_mgn_chip->mpidr_aff3 = vtimer_mbigen_chip_read_aff3(vtimer_mgn_chip); + err = vtimer_mbigen_chip_match_cpu(vtimer_mgn_chip); + if (err) { + dev_err(&pdev->dev, + "Fail to match vtimer mbigen device with cpu\n"); + goto out; + } + + spin_lock_init(&vtimer_mgn_chip->vmgn_lock); + list_add(&vtimer_mgn_chip->entry, &vtimer_mgn_list); + vtimer_mbigen_set_kvm_info(); + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/mbigen-vtimer:online", + vtimer_mbigen_set_type, NULL); + + pr_info("vtimer mbigen device @%p probed success!\n", mgn_chip->base); + return 0; + +out: + kfree(vtimer_mgn_chip); + dev_err(&pdev->dev, "vtimer mbigen device @%p probed failed\n", + mgn_chip->base); + return err; +} + static int mbigen_device_probe(struct platform_device *pdev) { struct mbigen_device *mgn_chip; @@ -374,6 +839,14 @@ static int mbigen_device_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, mgn_chip); + + err = vtimer_mbigen_device_probe(pdev); + + if (err) { + dev_err(&pdev->dev, "Failed to probe vtimer mbigen device\n"); + return err; + } + return 0; } @@ -393,7 +866,18 @@ static struct platform_driver mbigen_platform_driver = { .probe = mbigen_device_probe, }; -module_platform_driver(mbigen_platform_driver); +static int __init mbigen_init(void) +{ + return platform_driver_register(&mbigen_platform_driver); +} + +static void __exit mbigen_exit(void) +{ + return platform_driver_unregister(&mbigen_platform_driver); +} + +arch_initcall(mbigen_init); +module_exit(mbigen_exit); MODULE_AUTHOR("Jun Ma "); MODULE_AUTHOR("Yun Wu "); diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index cadd4a820c03364ae1c3afd32278dcdcace8cb61..30897ad1bbf79aead6d1ee44cac1dab626e8c478 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -493,6 +493,15 @@ config HISI_HIKEY_USB switching between the dual-role USB-C port and the USB-A host ports using only one USB controller. +config VIRT_PLAT_DEV + depends on KVM && ARM64 && ARCH_HISI + default y + tristate "virt platform device driver" + help + Enable this configuration option to probe the virtual platform device, + which created for the Qemu emulated device to implement virtual MSI + direct injection. + config OPEN_DICE tristate "Open Profile for DICE driver" depends on OF_RESERVED_MEM diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index f2a4d1ff65d46a2a014b6e40ed737d26a68a25d0..4592c454be3bbe5fea4186fb8e41fd54a6d07109 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_PVPANIC) += pvpanic/ obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o +obj-$(CONFIG_VIRT_PLAT_DEV) += virt_plat_dev.o obj-$(CONFIG_HI6421V600_IRQ) += hi6421v600-irq.o obj-$(CONFIG_OPEN_DICE) += open-dice.o obj-$(CONFIG_GP_PCI1XXXX) += mchp_pci1xxxx/ diff --git a/drivers/misc/virt_plat_dev.c b/drivers/misc/virt_plat_dev.c new file mode 100644 index 0000000000000000000000000000000000000000..902d2a7ad0f09a7041b896b77e4b9a9872b46545 --- /dev/null +++ b/drivers/misc/virt_plat_dev.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019-2020 HUAWEI TECHNOLOGIES CO., LTD., All Rights Reserved. + * Author: Wanghaibin + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define VIRT_DEV_DEBUG 1 + +#ifdef VIRT_DEV_DEBUG +#define virtdev_info(fmt, ...) pr_info("virdev: " fmt, ## __VA_ARGS__) +#else +#define virtdev_info(fmt, ...) +#endif + +static irqreturn_t virt_irq_handle(int irq, void *data) +{ + return IRQ_HANDLED; +} + +static void virt_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ +} + +static int virt_device_probe(struct platform_device *pdev) +{ + struct msi_desc *desc; + unsigned int *drvdata = dev_get_drvdata(&pdev->dev); + unsigned int nvec = *drvdata; + struct irq_domain *vp_irqdomain = vp_get_irq_domain(); + int ret; + + if (!vp_irqdomain) + return -ENXIO; + + virtdev_info("Allocate platform msi irqs nvecs: %d\n", nvec); + dev_set_msi_domain(&pdev->dev, vp_irqdomain); + + ret = platform_msi_domain_alloc_irqs(&pdev->dev, nvec, + virt_write_msi_msg); + if (ret) { + pr_err("Allocate platform msi irqs failed %d\n", ret); + goto error; + } + + virtdev_info("Allocate platform msi irqs succeed\n"); + msi_for_each_desc(desc, &pdev->dev, MSI_DESC_ALL) { + virtdev_info("Request irq %d\n", desc->irq); + ret = request_irq(desc->irq, virt_irq_handle, 0, + "virt_dev_host", pdev); + if (ret) { + pr_err("Request irq %d failed %d\n", desc->irq, ret); + goto error_free_irqs; + } + } + + virtdev_info("Init virtual platform device driver successfully.\n"); + return 0; + +error_free_irqs: + msi_for_each_desc(desc, &pdev->dev, MSI_DESC_ALL) + free_irq(desc->irq, pdev); + + platform_msi_domain_free_irqs(&pdev->dev); +error: + return ret; +} + +static int virt_device_remove(struct platform_device *pdev) +{ + struct msi_desc *desc; + + virtdev_info("%s\n", __func__); + msi_for_each_desc(desc, &pdev->dev, MSI_DESC_ALL) + free_irq(desc->irq, pdev); + + platform_msi_domain_free_irqs(&pdev->dev); + + return 0; +} + +static struct platform_driver virtdev_driver = { + .driver = { + /* Using the device & driver name to match each other */ + .name = "virt_plat_dev", + }, + .probe = virt_device_probe, + .remove = virt_device_remove, +}; + +static int __init virtdev_init(void) +{ + int ret; + + ret = platform_driver_register(&virtdev_driver); + if (ret) { + pr_err("Register virtdev platform driver failed (%d)\n", ret); + return ret; + } + + virtdev_info("Register virtdev platform driver succeed.\n"); + return 0; +} +module_init(virtdev_init); + +static void __exit virtdev_exit(void) +{ + platform_driver_unregister(&virtdev_driver); +} +module_exit(virtdev_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index cbbc9a6dc571587db49b5e9b965f9e3610b83c55..a9aacb15c967caf783176ae29f68bc43c1f8ef93 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -67,6 +67,10 @@ struct arch_timer_kvm_info { struct timecounter timecounter; int virtual_irq; int physical_irq; + +/* vtimer expand device probed flag */ +#define VT_EXPANDDEV_PROBED (1 << 0) + unsigned long irqbypass_flag; }; struct arch_timer_mem_frame { @@ -109,4 +113,33 @@ static inline bool arch_timer_evtstrm_available(void) #endif +static inline bool vtimer_irqbypass_hw_support(struct arch_timer_kvm_info *info) +{ + return info->irqbypass_flag & VT_EXPANDDEV_PROBED; +} + +#ifdef CONFIG_HISILICON_IRQ_MBIGEN + +void vtimer_mbigen_set_vector(int cpu_id, u16 vpeid); +bool vtimer_mbigen_get_active(int cpu_id); +void vtimer_mbigen_set_auto_clr(int cpu_id, bool set); +void vtimer_gic_set_auto_clr(int cpu_id, bool set); +void vtimer_mbigen_set_active(int cpu_id, bool set); + +#else + +static inline void vtimer_mbigen_set_vector(int cpu_id, u16 vpeid) {} + +static inline bool vtimer_mbigen_get_active(int cpu_id) +{ + /* You really shouldn't get here.. */ + return false; +} + +static inline void vtimer_mbigen_set_auto_clr(int cpu_id, bool set) {} +static inline void vtimer_gic_set_auto_clr(int cpu_id, bool set) {} +static inline void vtimer_mbigen_set_active(int cpu_id, bool set) {} + +#endif + #endif diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e748bc957d83262233b3c8a655ed68b0ad8f34dd..d9946067dd4e2f0dae0fbc7ebac1b6b91653a9cf 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -75,6 +75,13 @@ struct arch_timer_context { u32 host_timer_irq; }; +struct vtimer_mbigen_context { + /* Active state in vtimer mbigen */ + bool active; + + bool loaded; +}; + struct timer_map { struct arch_timer_context *direct_vtimer; struct arch_timer_context *direct_ptimer; @@ -92,10 +99,14 @@ struct arch_timer_cpu { /* Is the timer enabled */ bool enabled; + + /* Info for vtimer mbigen device */ + struct vtimer_mbigen_context mbigen_ctx; }; int __init kvm_timer_hyp_init(bool has_gic); int kvm_timer_enable(struct kvm_vcpu *vcpu); +int kvm_vtimer_config(struct kvm *kvm); int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_sync_user(struct kvm_vcpu *vcpu); @@ -126,6 +137,8 @@ void kvm_timer_init_vhe(void); #define vcpu_hvtimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HVTIMER]) #define vcpu_hptimer(v) (&(v)->arch.timer_cpu.timers[TIMER_HPTIMER]) +#define vcpu_vtimer_mbigen(v) (&(v)->arch.timer_cpu.mbigen_ctx) + #define arch_timer_ctx_index(ctx) ((ctx) - vcpu_timer((ctx)->vcpu)->timers) #define timer_vm_data(ctx) (&(ctx)->vcpu->kvm->arch.timer_data) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 5b27f94d4fad6a5bc16d7f5fb737435dc0972bd2..a690527f041d04d9ff21e1324553b44c7301f1d3 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -35,6 +35,31 @@ #define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ (irq) <= VGIC_MAX_SPI) +struct shadow_dev { + struct kvm *kvm; + struct list_head entry; + + u32 devid; /* guest visible device id */ + u32 nvecs; + unsigned long *enable; + int *host_irq; + struct kvm_msi *msi; + + struct platform_device *pdev; + + struct work_struct destroy; +}; + +/* Information about HiSilicon implementation of vtimer (GICv4.1-based) */ +struct vtimer_info { + u32 intid; + + bool (*get_active_stat)(struct kvm_vcpu *vcpu, int vintid); + void (*set_active_stat)(struct kvm_vcpu *vcpu, int vintid, bool active); +}; + +u16 kvm_vgic_get_vcpu_vpeid(struct kvm_vcpu *vcpu); + enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ @@ -77,6 +102,12 @@ struct vgic_global { /* Pseudo GICv3 from outer space */ bool no_hw_deactivation; + /* + * Hardware (HiSilicon implementation) has vtimer interrupt + * direct injection support? + */ + bool has_direct_vtimer; + /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; @@ -155,6 +186,8 @@ struct vgic_irq { void *owner; /* Opaque pointer to reserve an interrupt for in-kernel devices. */ + + struct vtimer_info *vtimer_info; /* vtimer interrupt only */ }; static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq) @@ -258,6 +291,9 @@ struct vgic_dist { /* Wants SGIs without active state */ bool nassgireq; + /* Indicate whether the vtimer irqbypass mode is used */ + bool vtimer_irqbypass; + struct vgic_irq *spis; struct vgic_io_device dist_iodev; @@ -292,6 +328,9 @@ struct vgic_dist { * else. */ struct its_vm its_vm; + + raw_spinlock_t sdev_list_lock; + struct list_head sdev_list_head; }; struct vgic_v2_cpu_if { @@ -331,6 +370,8 @@ struct vgic_cpu { struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; + struct vtimer_info vtimer; + raw_spinlock_t ap_list_lock; /* Protects the ap_list */ /* @@ -402,6 +443,14 @@ void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1); +/** + * kvm_vgic_vtimer_irqbypass_support - Get the vtimer irqbypass HW capability + */ +static inline bool kvm_vgic_vtimer_irqbypass_support(void) +{ + return kvm_vgic_global_state.has_direct_vtimer; +} + /** * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW * @@ -432,9 +481,22 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu); +int kvm_vgic_config_vtimer_irqbypass(struct kvm_vcpu *vcpu, u32 vintid, + bool (*get_as)(struct kvm_vcpu *, int), + void (*set_as)(struct kvm_vcpu *, int, bool)); /* CPU HP callbacks */ void kvm_vgic_cpu_up(void); void kvm_vgic_cpu_down(void); +extern bool sdev_enable; + +void kvm_shadow_dev_init(void); +int kvm_shadow_dev_create(struct kvm *kvm, struct kvm_master_dev_info *mdi); +void kvm_shadow_dev_delete(struct kvm *kvm, u32 devid); +void kvm_shadow_dev_delete_all(struct kvm *kvm); +struct shadow_dev *kvm_shadow_dev_get(struct kvm *kvm, struct kvm_msi *msi); + +int shadow_dev_virq_bypass_inject(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 728691365464c1585b23338184c95ba90f1b65c1..8df894c12d2f8b2737bac8d67778471f1ea34c30 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -380,9 +380,15 @@ #define GITS_TRANSLATER 0x10040 #define GITS_SGIR 0x20020 +/* HiSilicon IMP DEF register to set vPPI pending. */ +#define GITS_PPIR 0x200A8 + +/* HiSilicon IMP DEF register */ +#define GITS_VERSION 0xC000 #define GITS_SGIR_VPEID GENMASK_ULL(47, 32) -#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) +/* Hackish... Extend it to [4:0] to support vPPI. */ +#define GITS_SGIR_VINTID GENMASK_ULL(4, 0) #define GITS_CTLR_ENABLE (1U << 0) #define GITS_CTLR_ImDe (1U << 1) @@ -404,6 +410,14 @@ #define GITS_TYPER_VMAPP (1ULL << 40) #define GITS_TYPER_SVPET GENMASK_ULL(42, 41) +/** + * HiSilicon IMP DEF field which indicates if the vPPI direct injection + * is supported. + * - 0: not supported + * - 1: supported + */ +#define GITS_VERSION_VTIMER (1ULL << 12) + #define GITS_IIDR_REV_SHIFT 12 #define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) #define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) @@ -631,6 +645,7 @@ struct rdists { bool has_rvpeid; bool has_direct_lpi; bool has_vpend_valid_dirty; + bool has_vtimer; }; struct irq_domain; @@ -641,6 +656,8 @@ int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent); +phys_addr_t get_gicr_paddr(int cpu); + static inline bool gic_enable_sre(void) { u32 val; @@ -656,6 +673,36 @@ static inline bool gic_enable_sre(void) return !!(val & ICC_SRE_EL1_SRE); } +enum gic_intid_range { + SGI_RANGE, + PPI_RANGE, + SPI_RANGE, + EPPI_RANGE, + ESPI_RANGE, + LPI_RANGE, + __INVALID_RANGE__ +}; + +static inline enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) +{ + switch (hwirq) { + case 0 ... 15: + return SGI_RANGE; + case 16 ... 31: + return PPI_RANGE; + case 32 ... 1019: + return SPI_RANGE; + case EPPI_BASE_INTID ... (EPPI_BASE_INTID + 63): + return EPPI_RANGE; + case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023): + return ESPI_RANGE; + case 8192 ... GENMASK(23, 0): + return LPI_RANGE; + default: + return __INVALID_RANGE__; + } +} + #endif #endif diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 2c63375bbd43f41d3c5019ab4f4f3583196b522f..20b06f4ff7d6f9e639b06b3ea08f6c4bdb1ede5e 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -57,7 +57,8 @@ struct its_vpe { u8 priority; bool enabled; bool group; - } sgi_config[16]; + } sgi_config[32]; + int nr_irqs; atomic_t vmapp_count; }; }; @@ -143,7 +144,8 @@ int its_prop_update_vsgi(int irq, u8 priority, bool group); struct irq_domain_ops; int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *vpe_ops, - const struct irq_domain_ops *sgi_ops); + const struct irq_domain_ops *sgi_ops, + bool has_vtimer_irqbypass); bool gic_cpuif_has_vsgi(void); diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a75b2c7de69d09262946ab4aade39b46c5be6098..8dd917837a178ba53969ff3f17f71d9163678551 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -32,6 +32,8 @@ struct gic_kvm_info { bool has_v4; /* rvpeid support */ bool has_v4_1; + /* vtimer irqbypass support */ + bool has_vtimer; /* Deactivation impared, subpar stuff */ bool no_hw_deactivation; }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index fb6c6109fdcad69f81cd38edf52dce7dc3d7a4e9..a5d156dabbf73575de8ccc15a8ea851b0c7749a0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -631,6 +631,11 @@ struct kvm_xen_evtchn { u32 priority; }; +struct kire_data { + bool valid; + void *data; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -654,6 +659,8 @@ struct kvm_kernel_irq_routing_entry { struct kvm_xen_evtchn xen_evtchn; }; struct hlist_node link; + + struct kire_data cache; }; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING @@ -1640,6 +1647,8 @@ int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); +void kire_arch_cached_data_update(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e); /* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. diff --git a/include/linux/msi.h b/include/linux/msi.h index ddace8c34dcf958edae65de2858bf924adb9d19e..865dfaf88c98c4f66d6a4d1d2f54cb5bbc534d0f 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -684,6 +684,8 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); + +struct irq_domain *vp_get_irq_domain(void); #else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 13065dd96132da65beb99f9455659c9b75ed109d..d04585bb9937f7bfb4147307959ec9e94a6a9f05 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1193,6 +1193,8 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 +#define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { @@ -1461,6 +1463,11 @@ struct kvm_vfio_spapr_tce { __s32 tablefd; }; +struct kvm_master_dev_info { + __u32 nvectors; + struct kvm_msi msi[0]; +}; + /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. @@ -1571,6 +1578,9 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) +#define KVM_CREATE_SHADOW_DEV _IOW(KVMIO, 0xf0, struct kvm_master_dev_info) +#define KVM_DEL_SHADOW_DEV _IOW(KVMIO, 0xf1, __u32) + /* * ioctls for vcpu fds */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 89912a17f5d576da3a06d0020ff1ed2e2c6bee3d..ae8407ecce9ce7d8b63985d9ae5f72e926a6c7fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -38,6 +38,12 @@ kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) return true; } +void __attribute__((weak)) +kire_arch_cached_data_update(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e) +{ +} + static void irqfd_inject(struct work_struct *work) { @@ -270,6 +276,8 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) else irqfd->irq_entry.type = 0; + kire_arch_cached_data_update(kvm, &irqfd->irq_entry); + write_seqcount_end(&irqfd->irq_entry_sc); }