diff --git a/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp_.patch b/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch similarity index 69% rename from ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp_.patch rename to ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch index 5727a1d482a732c900701024507156a0f999bf40..3012512988402756527c95341aa921a362c9e43e 100644 --- a/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp_.patch +++ b/ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch @@ -1,8 +1,11 @@ -From f3c1fc4dce1582ccc1754899d5149d233e8629ff Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Thu, 3 Oct 2019 17:46:40 +0200 -Subject: [PATCH 3/3] ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for - smp_cpus > 256 +From e3a7ec839fa4f823666d726989c375dcf73348a4 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 15 Apr 2020 16:14:50 +0800 +Subject: [PATCH] ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for smp_cpus > + 256 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit Host kernel within [4.18, 5.3] report an erroneous KVM_MAX_VCPUS=512 for ARM. The actual capability to instantiate more than 256 vcpus @@ -21,29 +24,30 @@ Message-id: 20191003154640.22451-4-eric.auger@redhat.com Signed-off-by: Peter Maydell (cherry-picked from commit fff9f5558d0e0813d4f80bfe1602acf225eca4fd) [yu: Use the legacy smp_cpus instead of ms->smp.cpus, as we don't have - struct CpuTopology in MachineState at that time. See commit - edeeec911702 for details.] + ¦struct CpuTopology in MachineState at that time. See commit + ¦edeeec911702 for details.] Signed-off-by: Zenghui Yu --- - target/arm/kvm.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) + target/arm/kvm.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/target/arm/kvm.c b/target/arm/kvm.c -index f60185ad..383423c4 100644 +index 50e86f8b..cc7a46df 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c -@@ -174,6 +174,7 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms) - +@@ -173,6 +173,8 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) { + int ret = 0; ++ unsigned int smp_cpus = ms->smp.cpus; /* For ARM interrupt delivery is always asynchronous, * whether we are using an in-kernel VGIC or not. */ -@@ -187,7 +188,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) - +@@ -186,7 +188,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); - + - return 0; + if (smp_cpus > 256 && + !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { @@ -54,8 +58,7 @@ index f60185ad..383423c4 100644 + + return ret; } - - unsigned long kvm_arch_vcpu_id(CPUState *cpu) --- -2.19.1 + unsigned long kvm_arch_vcpu_id(CPUState *cpu) +-- +2.23.0 diff --git a/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch b/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch index b2ad176f8d56fdc71b6663f36bad7cc93ed4429a..4681e9f33f6877f199768b7a06a65df5356e6a7f 100644 --- a/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch +++ b/ARM64-record-vtimer-tick-when-cpu-is-stopped.patch @@ -1,6 +1,6 @@ -From d2fd6d1a5200b9a58863839d21d291cd4f76ac31 Mon Sep 17 00:00:00 2001 +From 4646a24045cf53f2cc5e0ef1974da88ef50ef676 Mon Sep 17 00:00:00 2001 From: Ying Fang -Date: Mon, 29 Jul 2019 15:47:27 +0800 +Date: Wed, 27 May 2020 11:54:31 +0800 Subject: [PATCH] ARM64: record vtimer tick when cpu is stopped The vtimer kick still increases even if the vcpu is stopped when VM has @@ -17,10 +17,10 @@ Signed-off-by: Ying Fang 3 files changed, 61 insertions(+) diff --git a/cpus.c b/cpus.c -index e83f72b4..f6ec48a2 100644 +index 927a00aa..b9aa51f8 100644 --- a/cpus.c +++ b/cpus.c -@@ -1063,6 +1063,28 @@ void cpu_synchronize_all_pre_loadvm(void) +@@ -1066,6 +1066,28 @@ void cpu_synchronize_all_pre_loadvm(void) } } @@ -49,7 +49,7 @@ index e83f72b4..f6ec48a2 100644 static int do_vm_stop(RunState state, bool send_stop) { int ret = 0; -@@ -1070,6 +1092,11 @@ static int do_vm_stop(RunState state, bool send_stop) +@@ -1073,6 +1095,11 @@ static int do_vm_stop(RunState state, bool send_stop) if (runstate_is_running()) { cpu_disable_ticks(); pause_all_vcpus(); @@ -61,7 +61,7 @@ index e83f72b4..f6ec48a2 100644 runstate_set(state); vm_state_notify(0, state); if (send_stop) { -@@ -1909,11 +1936,42 @@ void cpu_resume(CPUState *cpu) +@@ -1918,11 +1945,42 @@ void cpu_resume(CPUState *cpu) qemu_cpu_kick(cpu); } @@ -105,23 +105,23 @@ index e83f72b4..f6ec48a2 100644 cpu_resume(cpu); } diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index d4d28369..e107e395 100644 +index 86eb79cd..aec6a214 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h -@@ -270,6 +270,8 @@ typedef struct CPUARMState { - uint64_t elr_el[4]; /* AArch64 exception link regs */ +@@ -262,6 +262,8 @@ typedef struct CPUARMState { uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + + uint64_t vtimer; /* Timer tick when vcpu stop */ + /* System control coprocessor (cp15) */ struct { uint32_t c0_cpuid; diff --git a/target/arm/machine.c b/target/arm/machine.c -index b2925496..d64a0057 100644 +index ee3c59a6..ec28b839 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c -@@ -792,6 +792,7 @@ const VMStateDescription vmstate_arm_cpu = { +@@ -814,6 +814,7 @@ const VMStateDescription vmstate_arm_cpu = { VMSTATE_UINT32(env.exception.syndrome, ARMCPU), VMSTATE_UINT32(env.exception.fsr, ARMCPU), VMSTATE_UINT64(env.exception.vaddress, ARMCPU), diff --git a/Add-mtod_check.patch b/Add-mtod_check.patch new file mode 100644 index 0000000000000000000000000000000000000000..7309bc4342c71e6bf41d7835f589826e203e8a82 --- /dev/null +++ b/Add-mtod_check.patch @@ -0,0 +1,54 @@ +From 8cb4d202d4e5713e9b2b5f0ec817234941623f10 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 15:58:25 +0400 +Subject: [PATCH 1/6] Add mtod_check() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Recent security issues demonstrate the lack of safety care when casting +a mbuf to a particular structure type. At least, it should check that +the buffer is large enough. The following patches will make use of this +function. + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/mbuf.c | 11 +++++++++++ + slirp/src/mbuf.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +index 4fd62282..6d0653ed 100644 +--- a/slirp/src/mbuf.c ++++ b/slirp/src/mbuf.c +@@ -222,3 +222,14 @@ struct mbuf *dtom(Slirp *slirp, void *dat) + + return (struct mbuf *)0; + } ++ ++void *mtod_check(struct mbuf *m, size_t len) ++{ ++ if (m->m_len >= len) { ++ return m->m_data; ++ } ++ ++ DEBUG_ERROR("mtod failed"); ++ ++ return NULL; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +index 546e7852..2015e323 100644 +--- a/slirp/src/mbuf.h ++++ b/slirp/src/mbuf.h +@@ -118,6 +118,7 @@ void m_inc(struct mbuf *, int); + void m_adj(struct mbuf *, int); + int m_copy(struct mbuf *, struct mbuf *, int, int); + struct mbuf *dtom(Slirp *, void *); ++void *mtod_check(struct mbuf *, size_t len); + + static inline void ifs_init(struct mbuf *ifm) + { +-- +2.27.0 + diff --git a/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch b/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d492058d77ba70d51c88bffcbef7a1ae7b28adb --- /dev/null +++ b/Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch @@ -0,0 +1,32 @@ +From 38734e26ce3840d459da13607a9d46de24a15388 Mon Sep 17 00:00:00 2001 +From: kevinZhu +Date: Thu, 29 Oct 2020 19:24:48 +0800 +Subject: [PATCH] Bugfix: hw/acpi: Use max_cpus instead of cpus when build PPTT + table + +The field "cpus" is the initial number of CPU for guest, and the field "max_cpus" +is the max number of CPU after CPU hotplug. When building PPTT for guest, we +should take all CPUs into account, otherwise the "smp_sockets" is wrong. + +Fixes: 7cfcd8c8a2fe ("build smt processor structure to support smt topology") +Signed-off-by: Keqian Zhu +--- + hw/acpi/aml-build.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 8a3b51c835..f01669df57 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -167,7 +167,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) + struct offset_status offset; + const MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cores = ms->smp.cores; +- unsigned int smp_sockets = ms->smp.cpus / (smp_cores * ms->smp.threads); ++ unsigned int smp_sockets = ms->smp.max_cpus / (smp_cores * ms->smp.threads); + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + +-- +2.27.0 + diff --git a/Drop-bogus-IPv6-messages.patch b/Drop-bogus-IPv6-messages.patch new file mode 100644 index 0000000000000000000000000000000000000000..2fc1e0e780e34b1570fbcfcc4581138c79e7fa46 --- /dev/null +++ b/Drop-bogus-IPv6-messages.patch @@ -0,0 +1,30 @@ +From e8b555c08061ad78920611a5e98ee14fcd967692 Mon Sep 17 00:00:00 2001 +From: Ralf Haferkamp +Date: Fri, 11 Sep 2020 10:55:49 +0800 +Subject: [PATCH] Drop bogus IPv6 messages + +Drop IPv6 message shorter than what's mentioned in the playload +length header (+the size of IPv6 header). They're invalid and could +lead to data leakage in icmp6_send_echoreply(). + +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +index d9d2b7e..c2dce52 100644 +--- a/slirp/src/ip6_input.c ++++ b/slirp/src/ip6_input.c +@@ -49,6 +49,13 @@ void ip6_input(struct mbuf *m) + goto bad; + } + ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ + /* check ip_ttl for a correct ICMP reply */ + if (ip6->ip_hl == 0) { + icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); +-- +1.8.3.1 + diff --git a/Fix-use-after-free-in-vfio_migration_probe.patch b/Fix-use-after-free-in-vfio_migration_probe.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0a94e60054da414102dbda43f9d111c4bc2e6d9 --- /dev/null +++ b/Fix-use-after-free-in-vfio_migration_probe.patch @@ -0,0 +1,39 @@ +From 126fc13ebe9c5e58a5b1daeb4e102e6fa5845779 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Fri, 6 Nov 2020 23:32:24 +0530 +Subject: [PATCH] Fix use after free in vfio_migration_probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes Coverity issue: +CID 1436126: Memory - illegal accesses (USE_AFTER_FREE) + +Fixes: a9e271ec9b36 ("vfio: Add migration region initialization and finalize function") +Signed-off-by: Kirti Wankhede +Reviewed-by: David Edmondson +Reviewed-by: Alex Bennée +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1a97784486..8546075706 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -903,8 +903,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + goto add_blocker; + } + +- g_free(info); + trace_vfio_migration_probe(vbasedev->name, info->index); ++ g_free(info); + return 0; + + add_blocker: +-- +2.27.0 + diff --git a/Fixed-integer-overflow-in-e1000e.patch b/Fixed-integer-overflow-in-e1000e.patch new file mode 100644 index 0000000000000000000000000000000000000000..004390fc5a3d60d2aaf4912d0679c4fa471d28a2 --- /dev/null +++ b/Fixed-integer-overflow-in-e1000e.patch @@ -0,0 +1,40 @@ +From 41077af2c4283c15c0a822017ea51612d15b68f8 Mon Sep 17 00:00:00 2001 +From: Andrew Melnychenko +Date: Wed, 4 Mar 2020 16:20:58 +0200 +Subject: [PATCH 1/5] Fixed integer overflow in e1000e +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1737400 +Fixed setting max_queue_num if there are no peers in +NICConf. qemu_new_nic() creates NICState with 1 NetClientState(index +0) without peers, set max_queue_num to 0 - It prevents undefined +behavior and possible crashes, especially during pcie hotplug. + +Fixes: 6f3fbe4ed06 ("net: Introduce e1000e device emulation") +Signed-off-by: Andrew Melnychenko +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Dmitry Fleytman +Signed-off-by: Jason Wang +Signed-off-by: Zhenyu Ye +--- + hw/net/e1000e.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index 581f7d03..1e827c4f 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -325,7 +325,7 @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + +- s->core.max_queue_num = s->conf.peers.queues - 1; ++ s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0; + + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); +-- +2.22.0.windows.1 + diff --git a/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch b/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch new file mode 100644 index 0000000000000000000000000000000000000000..200e0b2df02607b11e3117863afd00a346419e27 --- /dev/null +++ b/Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch @@ -0,0 +1,88 @@ +From 73a5bf472921068e6db10e7e325b7ac46f111834 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 18:36:05 -0400 +Subject: [PATCH] Revert "ide/ahci: Check for -ECANCELED in aio callbacks" + +This reverts commit 0d910cfeaf2076b116b4517166d5deb0fea76394. + +It's not correct to just ignore an error code in a callback; we need to +handle that error and possible report failure to the guest so that they +don't wait indefinitely for an operation that will now never finish. + +This ought to help cases reported by Nutanix where iSCSI returns a +legitimate -ECANCELED for certain operations which should be propagated +normally. + +Reported-by: Shaju Abraham +Signed-off-by: John Snow +Message-id: 20190729223605.7163-1-jsnow@redhat.com +Signed-off-by: John Snow +(cherry picked from commit 8ec41c4265714255d5a138f8b538faf3583dcff6) +Signed-off-by: Michael Roth +--- + hw/ide/ahci.c | 3 --- + hw/ide/core.c | 14 -------------- + 2 files changed, 17 deletions(-) + +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 00ba422a48..6aaf66534a 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1023,9 +1023,6 @@ static void ncq_cb(void *opaque, int ret) + IDEState *ide_state = &ncq_tfs->drive->port.ifs[0]; + + ncq_tfs->aiocb = NULL; +- if (ret == -ECANCELED) { +- return; +- } + + if (ret < 0) { + bool is_read = ncq_tfs->cmd == READ_FPDMA_QUEUED; +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 6afadf894f..8e1624f7ce 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -722,9 +722,6 @@ static void ide_sector_read_cb(void *opaque, int ret) + s->pio_aiocb = NULL; + s->status &= ~BUSY_STAT; + +- if (ret == -ECANCELED) { +- return; +- } + if (ret != 0) { + if (ide_handle_rw_error(s, -ret, IDE_RETRY_PIO | + IDE_RETRY_READ)) { +@@ -840,10 +837,6 @@ static void ide_dma_cb(void *opaque, int ret) + uint64_t offset; + bool stay_active = false; + +- if (ret == -ECANCELED) { +- return; +- } +- + if (ret == -EINVAL) { + ide_dma_error(s); + return; +@@ -975,10 +968,6 @@ static void ide_sector_write_cb(void *opaque, int ret) + IDEState *s = opaque; + int n; + +- if (ret == -ECANCELED) { +- return; +- } +- + s->pio_aiocb = NULL; + s->status &= ~BUSY_STAT; + +@@ -1058,9 +1047,6 @@ static void ide_flush_cb(void *opaque, int ret) + + s->pio_aiocb = NULL; + +- if (ret == -ECANCELED) { +- return; +- } + if (ret < 0) { + /* XXX: What sector number to set here? */ + if (ide_handle_rw_error(s, -ret, IDE_RETRY_FLUSH)) { +-- +2.23.0 diff --git a/Revert-vtimer-compat-cross-version-migration-from-v4.patch b/Revert-vtimer-compat-cross-version-migration-from-v4.patch new file mode 100644 index 0000000000000000000000000000000000000000..082f1763f9b445ba3816e2051c47129a6c64528a --- /dev/null +++ b/Revert-vtimer-compat-cross-version-migration-from-v4.patch @@ -0,0 +1,37 @@ +From ced290d644a00e18e70046194d042bcaa2703b65 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 27 May 2020 11:16:53 +0800 +Subject: [PATCH] Revert: "vtimer: compat cross version migration from v4.0.1" + +This reverts commit patch: +vtimer-compat-cross-version-migration-from-v4.0.1.patch + +Signed-off-by: Ying Fang + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 2609113d..86eb79cd 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -261,7 +261,6 @@ typedef struct CPUARMState { + uint64_t elr_el[4]; /* AArch64 exception link regs */ + uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + +- uint64_t vtimer; /* Timer tick when vcpu is stopped */ + + /* System control coprocessor (cp15) */ + struct { +diff --git a/target/arm/machine.c b/target/arm/machine.c +index ec28b839..ee3c59a6 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -814,7 +814,6 @@ const VMStateDescription vmstate_arm_cpu = { + VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + VMSTATE_UINT32(env.exception.fsr, ARMCPU), + VMSTATE_UINT64(env.exception.vaddress, ARMCPU), +- VMSTATE_UINT64(env.vtimer, ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), + { +-- +2.23.0 + diff --git a/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch b/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc1fd44d163beb57f08c3b58918d305772b0c362 --- /dev/null +++ b/Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch @@ -0,0 +1,27 @@ +From 843f593280b93e03bb7b0d0001da7488d61f13f6 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 6 Apr 2020 08:55:17 +0800 +Subject: [PATCH] Typo: Correct the name of CPU hotplug memory region + +Replace "acpi-mem-hotplug" with "acpi-cpu-hotplug" + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 7a90c8f82d..0c0bfe479a 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -203,7 +203,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +- "acpi-mem-hotplug", ACPI_CPU_HOTPLUG_REG_LEN); ++ "acpi-cpu-hotplug", ACPI_CPU_HOTPLUG_REG_LEN); + memory_region_add_subregion(as, base_addr, &state->ctrl_reg); + } + +-- +2.19.1 diff --git a/accel-kvm-Add-pre-park-vCPU-support.patch b/accel-kvm-Add-pre-park-vCPU-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..9bc81178581c30504ca1b32d1f47ba8cbc1a5b85 --- /dev/null +++ b/accel-kvm-Add-pre-park-vCPU-support.patch @@ -0,0 +1,63 @@ +From 135119d2e82e99adc67346572c761fbe54d73e4a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 13:04:40 +0800 +Subject: [PATCH] accel/kvm: Add pre-park vCPU support + +For that KVM do not support dynamic adjustment of vCPU count, +we must pre-park all possible vCPU at start. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + accel/kvm/kvm-all.c | 23 +++++++++++++++++++++++ + include/sysemu/kvm.h | 1 + + 2 files changed, 24 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f450f25295..84edbe8bb1 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -339,6 +339,29 @@ err: + return ret; + } + ++int kvm_create_parked_vcpu(unsigned long vcpu_id) ++{ ++ KVMState *s = kvm_state; ++ struct KVMParkedVcpu *vcpu = NULL; ++ int ret; ++ ++ DPRINTF("kvm_create_parked_vcpu\n"); ++ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ if (ret < 0) { ++ DPRINTF("kvm_create_vcpu failed\n"); ++ goto err; ++ } ++ ++ vcpu = g_malloc0(sizeof(*vcpu)); ++ vcpu->vcpu_id = vcpu_id; ++ vcpu->kvm_fd = ret; ++ QLIST_INSERT_HEAD(&s->kvm_parked_vcpus, vcpu, node); ++ ++err: ++ return ret; ++} ++ + static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) + { + struct KVMParkedVcpu *cpu; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index acd90aebb6..565adb4e2c 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -216,6 +216,7 @@ int kvm_has_many_ioeventfds(void); + int kvm_has_gsi_routing(void); + int kvm_has_intx_set_mask(void); + ++int kvm_create_parked_vcpu(unsigned long vcpu_id); + int kvm_init_vcpu(CPUState *cpu); + int kvm_cpu_exec(CPUState *cpu); + int kvm_destroy_vcpu(CPUState *cpu); +-- +2.19.1 diff --git a/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..daff52e901686e17c1c492e899165b773db96258 --- /dev/null +++ b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch @@ -0,0 +1,45 @@ +From b50b9a0e2e5e8262c830df5994f3abbe0a37655a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 17 Dec 2020 09:49:40 +0800 +Subject: [PATCH] accel: kvm: Fix memory waste under mismatch page size + +When handle dirty log, we face qemu_real_host_page_size and +TARGET_PAGE_SIZE. The first one is the granule of KVM dirty +bitmap, and the second one is the granule of QEMU dirty bitmap. + +As qemu_real_host_page_size >= TARGET_PAGE_SIZE (kvm_init() +enforced it), misuse TARGET_PAGE_SIZE to init kvmslot dirty_bmap +may waste memory. For example, when qemu_real_host_page_size is +64K and TARGET_PAGE_SIZE is 4K, it wastes 93.75% (15/16) memory. + +Signed-off-by: Keqian Zhu +Reviewed-by: Andrew Jones +Reviewed-by: Peter Xu +Message-Id: <20201217014941.22872-2-zhukeqian1@huawei.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5a6b89cc2a..4daff563a0 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -551,8 +551,12 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) + * too, in most cases). + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. ++ * ++ * Note: the granule of kvm dirty log is qemu_real_host_page_size. ++ * And mem->memory_size is aligned to it (otherwise this mem can't ++ * be registered to KVM). + */ +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size, + /*HOST_LONG_BITS*/ 64) / 8; + mem->dirty_bmap = g_malloc0(bitmap_size); + } +-- +2.27.0 + diff --git a/acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch b/acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch new file mode 100644 index 0000000000000000000000000000000000000000..0506d1b73c1586699dd0fe92254baa5d0af26a7b --- /dev/null +++ b/acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch @@ -0,0 +1,128 @@ +From 107c267ebe5b8c461268a4ff8384ad2f2b9e8ce0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Wed, 22 Apr 2020 16:11:13 +0800 +Subject: [PATCH] acpi/cpu: Prepare build_cpus_aml for arm virt + +We will reuse build_cpus_aml to build DSDT cpus aml in arm/virt +ACPI to realize cpu hotplug. Three points are added. + +1. Make ACPI IO address space configurable, because ARM64 platforms + don't use port IO for ACPI IO space. +2. Add GICC struct building support in _MAT of cpu aml. +3. Let the hotplug method parameter can be NULL, because ACPI GED + will realize it. + +Besides, CPU CPPC building is injected. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 32 +++++++++++++++++++++++++------- + hw/i386/acpi-build.c | 2 +- + include/hw/acpi/cpu.h | 3 ++- + 3 files changed, 28 insertions(+), 9 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 0c0bfe479a..72ad1fcff2 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -314,7 +314,8 @@ const VMStateDescription vmstate_cpu_hotplug = { + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + hwaddr io_base, + const char *res_root, +- const char *event_handler_method) ++ const char *event_handler_method, ++ AmlRegionSpace rs) + { + Aml *ifctx; + Aml *field; +@@ -342,13 +343,18 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + + crs = aml_resource_template(); +- aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, +- ACPI_CPU_HOTPLUG_REG_LEN)); ++ if (rs == AML_SYSTEM_IO) { ++ aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, ++ ACPI_CPU_HOTPLUG_REG_LEN)); ++ } else { ++ aml_append(crs, aml_memory32_fixed(io_base, ++ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); ++ } + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); + + /* declare CPU hotplug MMIO region with related access fields */ + aml_append(cpu_ctrl_dev, +- aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), ++ aml_operation_region("PRST", rs, aml_int(io_base), + ACPI_CPU_HOTPLUG_REG_LEN)); + + field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, +@@ -517,6 +523,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, aml_name_decl("_UID", uid)); + } + ++ assert(adevc); ++ if (adevc->cpu_cppc) { ++ adevc->cpu_cppc(adev, i, arch_ids->len, dev); ++ } ++ + method = aml_method("_STA", 0, AML_SERIALIZED); + aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); + aml_append(dev, method); +@@ -535,6 +546,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + apic->flags = cpu_to_le32(1); + break; + } ++ case ACPI_APIC_GENERIC_CPU_INTERFACE: { ++ AcpiMadtGenericCpuInterface *gicc = (void *)madt_buf->data; ++ gicc->flags = cpu_to_le32(1); ++ break; ++ } + default: + assert(0); + } +@@ -570,9 +586,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(sb_scope, cpus_dev); + aml_append(table, sb_scope); + +- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); +- aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); +- aml_append(table, method); ++ if (event_handler_method) { ++ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); ++ aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); ++ aml_append(table, method); ++ } + + g_free(cphp_res_path); + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 749218561a..c97731ecb3 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1869,7 +1869,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .acpi_1_compatible = true, .has_legacy_cphp = true + }; + build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, +- "\\_SB.PCI0", "\\_GPE._E02"); ++ "\\_SB.PCI0", "\\_GPE._E02", AML_SYSTEM_IO); + } + + if (pcms->memhp_io_base && nr_mem) { +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 62f0278ba2..a30ec84a4f 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -55,7 +55,8 @@ typedef struct CPUHotplugFeatures { + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + hwaddr io_base, + const char *res_root, +- const char *event_handler_method); ++ const char *event_handler_method, ++ AmlRegionSpace rs); + + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + +-- +2.19.1 diff --git a/acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch b/acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4e0a25c050e7948c1c7e1e2e0721c0b43d27fcc --- /dev/null +++ b/acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch @@ -0,0 +1,41 @@ +From 3cd6df0b9e7d7b544673ce9a63b405e236d8265b Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 10:05:54 +0800 +Subject: [PATCH] acpi/ged: Add virt_madt_cpu_entry to madt_cpu hook + +In build_cpus_aml, we will invoke this hook to build _MAT +aml mehtod for cpus. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 1 + + include/hw/acpi/generic_event_device.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 9cee90cc70..b834ae3ff6 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -288,6 +288,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + hc->plug = acpi_ged_device_plug_cb; + + adevc->send_event = acpi_ged_send_event; ++ adevc->madt_cpu = virt_madt_cpu_entry; + } + + static const TypeInfo acpi_ged_info = { +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index d157eac088..f99efad7a3 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -61,6 +61,7 @@ + + #include "hw/sysbus.h" + #include "hw/acpi/memory_hotplug.h" ++#include "hw/arm/virt.h" + + #define ACPI_POWER_BUTTON_DEVICE "PWRB" + +-- +2.19.1 diff --git a/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch b/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..57247e6797da5a977685f737e9454427ab2c41df --- /dev/null +++ b/acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch @@ -0,0 +1,204 @@ +From 05d22b55133db1a2526cfe305102e075e883b5e2 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 3 Apr 2020 15:41:01 +0800 +Subject: [PATCH] acpi/ged: Extend ACPI GED to support CPU hotplug + +This adds a new GED event called ACPI_GED_CPU_HOTPLUG_EVT. +The basic workflow is that: GED sends this event to guest, +then ACPI driver in guest will call _EVT method of GED aml, +then _EVT will call CSCN method in cpus aml to get status of +all cpus. + +The status of cpus is maintained by CPUHotplugState in GED and +is made accessable to guest through memory region. + +This also adds migration support to CPUHotplugState. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + docs/specs/acpi_hw_reduced_hotplug.rst | 3 ++- + hw/acpi/cpu.c | 1 - + hw/acpi/generic_event_device.c | 35 ++++++++++++++++++++++++++ + hw/arm/Kconfig | 1 + + include/hw/acpi/cpu.h | 2 ++ + include/hw/acpi/generic_event_device.h | 4 +++ + 6 files changed, 44 insertions(+), 2 deletions(-) + +diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst +index 911a98255b..deb481555d 100644 +--- a/docs/specs/acpi_hw_reduced_hotplug.rst ++++ b/docs/specs/acpi_hw_reduced_hotplug.rst +@@ -63,7 +63,8 @@ GED IO interface (4 byte access) + bits: + 0: Memory hotplug event + 1: System power down event +- 2-31: Reserved ++ 2: CPU hotplug event ++ 3-31: Reserved + + **write_access:** + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 72ad1fcff2..cb6bb67f3c 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -6,7 +6,6 @@ + #include "trace.h" + #include "sysemu/numa.h" + +-#define ACPI_CPU_HOTPLUG_REG_LEN 12 + #define ACPI_CPU_SELECTOR_OFFSET_WR 0 + #define ACPI_CPU_FLAGS_OFFSET_RW 4 + #define ACPI_CPU_CMD_OFFSET_WR 5 +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 82139b4314..478a4ee87c 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -23,6 +23,7 @@ + static const uint32_t ged_supported_events[] = { + ACPI_GED_MEM_HOTPLUG_EVT, + ACPI_GED_PWR_DOWN_EVT, ++ ACPI_GED_CPU_HOTPLUG_EVT, + }; + + /* +@@ -110,6 +111,9 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), + aml_int(0x80))); + break; ++ case ACPI_GED_CPU_HOTPLUG_EVT: ++ aml_append(if_ctx, aml_call0("\\_SB.CPUS.CSCN")); ++ break; + default: + /* + * Please make sure all the events in ged_supported_events[] +@@ -176,6 +180,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "virt: device plug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -192,6 +198,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + sel = ACPI_GED_MEM_HOTPLUG_EVT; + } else if (ev & ACPI_POWER_DOWN_STATUS) { + sel = ACPI_GED_PWR_DOWN_EVT; ++ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { ++ sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else { + /* Unknown event. Return without generating interrupt. */ + warn_report("GED: Unsupported event %d. No irq injected", ev); +@@ -224,6 +232,16 @@ static const VMStateDescription vmstate_memhp_state = { + } + }; + ++static const VMStateDescription vmstate_cpuhp_state = { ++ .name = "acpi-ged/cpuhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static const VMStateDescription vmstate_ged_state = { + .name = "acpi-ged-state", + .version_id = 1, +@@ -244,6 +262,7 @@ static const VMStateDescription vmstate_acpi_ged = { + }, + .subsections = (const VMStateDescription * []) { + &vmstate_memhp_state, ++ &vmstate_cpuhp_state, + NULL + } + }; +@@ -254,6 +273,7 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; ++ MachineClass *mc; + + memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -273,6 +293,21 @@ static void acpi_ged_initfn(Object *obj) + sysbus_init_mmio(sbd, &s->container_memhp); + acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), + &s->memhp_state, 0); ++ ++ mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!mc->possible_cpu_arch_ids) { ++ /* ++ * MachineClass should support possible_cpu_arch_ids in ++ * cpu_hotplug_hw_init below. ++ */ ++ return; ++ } ++ ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(sbd, &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index ad7f7c089b..15e18b0a48 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -24,6 +24,7 @@ config ARM_VIRT + select DIMM + select ACPI_MEMORY_HOTPLUG + select ACPI_HW_REDUCED ++ select ACPI_CPU_HOTPLUG + + config CHEETAH + bool +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index a30ec84a4f..e726414459 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -17,6 +17,8 @@ + #include "hw/acpi/aml-build.h" + #include "hw/hotplug.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++ + typedef struct AcpiCpuStatus { + struct CPUState *cpu; + uint64_t arch_id; +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index f99efad7a3..e702ff1e18 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -62,6 +62,7 @@ + #include "hw/sysbus.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/arm/virt.h" ++#include "hw/acpi/cpu.h" + + #define ACPI_POWER_BUTTON_DEVICE "PWRB" + +@@ -83,6 +84,7 @@ + */ + #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 + #define ACPI_GED_PWR_DOWN_EVT 0x2 ++#define ACPI_GED_CPU_HOTPLUG_EVT 0x4 + + typedef struct GEDState { + MemoryRegion io; +@@ -93,6 +95,8 @@ typedef struct AcpiGedState { + SysBusDevice parent_obj; + MemHotplugState memhp_state; + MemoryRegion container_memhp; ++ CPUHotplugState cpuhp_state; ++ MemoryRegion container_cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.19.1 diff --git a/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch b/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch new file mode 100644 index 0000000000000000000000000000000000000000..30f210b33b66332cc0c16b5d2ddb706a177a2130 --- /dev/null +++ b/acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch @@ -0,0 +1,95 @@ +From 0288d98f0ef4d17a73cf2bad1b928cd7c044e318 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 13:40:44 +0800 +Subject: [PATCH] acpi/madt: Add pre-sizing capability to MADT GICC struct + +The count of possible CPUs is exposed to guest through the count +of MADT GICC struct, so we should pre-sizing MADT GICC too. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 26 +++++++++++++++++++++----- + include/hw/acpi/acpi-defs.h | 1 + + 2 files changed, 22 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index dbe9acb148..efac788ba1 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -678,6 +678,13 @@ void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + const MemMapEntry *memmap = vms->memmap; + AcpiMadtGenericCpuInterface *gicc = acpi_data_push(entry, sizeof(*gicc)); + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(uid)); ++ static bool pmu; ++ ++ if (uid == 0) { ++ pmu = arm_feature(&armcpu->env, ARM_FEATURE_PMU); ++ } ++ /* FEATURE_PMU should be all enabled or disabled for CPUs */ ++ assert(!armcpu || arm_feature(&armcpu->env, ARM_FEATURE_PMU) == pmu); + + gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; + gicc->length = sizeof(*gicc); +@@ -687,11 +694,15 @@ void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); + } + gicc->cpu_interface_number = cpu_to_le32(uid); +- gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); ++ gicc->arm_mpidr = possible_cpus->cpus[uid].arch_id; + gicc->uid = cpu_to_le32(uid); +- gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); ++ if (armcpu) { ++ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); ++ } else { ++ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_DISABLED); ++ } + +- if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { ++ if (pmu) { + gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); + } + if (vms->virt) { +@@ -704,12 +715,17 @@ static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); ++ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); + int madt_start = table_data->len; + const MemMapEntry *memmap = vms->memmap; + const int *irqmap = vms->irqmap; + AcpiMultipleApicTable *madt; + AcpiMadtGenericDistributor *gicd; + AcpiMadtGenericMsiFrame *gic_msi; ++ /* The MADT GICC numbers */ ++ int num_cpu = vms->smp_cpus; + int i; + + madt = acpi_data_push(table_data, sizeof *madt); +@@ -720,8 +736,8 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base); + gicd->version = vms->gic_version; + +- for (i = 0; i < vms->smp_cpus; i++) { +- virt_madt_cpu_entry(NULL, i, NULL, table_data); ++ for (i = 0; i < num_cpu; i++) { ++ virt_madt_cpu_entry(NULL, i, possible_cpus, table_data); + } + + if (vms->gic_version == 3) { +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index 39ae91d3b8..6bfa7f9152 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -306,6 +306,7 @@ typedef struct AcpiMadtGenericCpuInterface AcpiMadtGenericCpuInterface; + + /* GICC CPU Interface Flags */ + #define ACPI_MADT_GICC_ENABLED 1 ++#define ACPI_MADT_GICC_DISABLED 0 + + struct AcpiMadtGenericDistributor { + ACPI_SUB_HEADER_DEF +-- +2.19.1 diff --git a/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch b/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bda35c51a3952121a155081bf0a37ce3534da25 --- /dev/null +++ b/acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch @@ -0,0 +1,108 @@ +From a3097eed8b642dc6fe891112340821e869b90cc2 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 13 Jan 2020 19:02:20 +0800 +Subject: [PATCH] acpi/madt: Factor out the building of MADT GICC struct + +To realize CPU hotplug, the cpus aml within ACPI DSDT should contain +_MAT mathod, which is equal to the GICC struct in ACPI MADT. Factor +out the GICC building code from ACPI MADT and reuse it in build_cpus_aml. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 51 +++++++++++++++++++++++----------------- + include/hw/arm/virt.h | 3 +++ + 2 files changed, 32 insertions(+), 22 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index f48733d9f2..4b6aace433 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -664,6 +664,34 @@ build_gtdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + table_data->len - gtdt_start, 2, NULL, NULL); + } + ++void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, ++ const CPUArchIdList *possible_cpus, GArray *entry) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); ++ const MemMapEntry *memmap = vms->memmap; ++ AcpiMadtGenericCpuInterface *gicc = acpi_data_push(entry, sizeof(*gicc)); ++ ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(uid)); ++ ++ gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; ++ gicc->length = sizeof(*gicc); ++ if (vms->gic_version == 2) { ++ gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base); ++ gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base); ++ gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); ++ } ++ gicc->cpu_interface_number = cpu_to_le32(uid); ++ gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); ++ gicc->uid = cpu_to_le32(uid); ++ gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); ++ ++ if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { ++ gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); ++ } ++ if (vms->virt) { ++ gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ)); ++ } ++} ++ + /* MADT */ + static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +@@ -686,28 +714,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + gicd->version = vms->gic_version; + + for (i = 0; i < vms->smp_cpus; i++) { +- AcpiMadtGenericCpuInterface *gicc = acpi_data_push(table_data, +- sizeof(*gicc)); +- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); +- +- gicc->type = ACPI_APIC_GENERIC_CPU_INTERFACE; +- gicc->length = sizeof(*gicc); +- if (vms->gic_version == 2) { +- gicc->base_address = cpu_to_le64(memmap[VIRT_GIC_CPU].base); +- gicc->gich_base_address = cpu_to_le64(memmap[VIRT_GIC_HYP].base); +- gicc->gicv_base_address = cpu_to_le64(memmap[VIRT_GIC_VCPU].base); +- } +- gicc->cpu_interface_number = cpu_to_le32(i); +- gicc->arm_mpidr = cpu_to_le64(armcpu->mp_affinity); +- gicc->uid = cpu_to_le32(i); +- gicc->flags = cpu_to_le32(ACPI_MADT_GICC_ENABLED); +- +- if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { +- gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ)); +- } +- if (vms->virt) { +- gicc->vgic_interrupt = cpu_to_le32(PPI(ARCH_GIC_MAINT_IRQ)); +- } ++ virt_madt_cpu_entry(NULL, i, NULL, table_data); + } + + if (vms->gic_version == 3) { +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 3dfefca93b..6b1f10b231 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -37,6 +37,7 @@ + #include "hw/block/flash.h" + #include "sysemu/kvm.h" + #include "hw/intc/arm_gicv3_common.h" ++#include "hw/acpi/acpi_dev_interface.h" + + #define NUM_GICV2M_SPIS 64 + #define NUM_VIRTIO_TRANSPORTS 32 +@@ -154,6 +155,8 @@ typedef struct { + OBJECT_CLASS_CHECK(VirtMachineClass, klass, TYPE_VIRT_MACHINE) + + void virt_acpi_setup(VirtMachineState *vms); ++void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, ++ const CPUArchIdList *cpu_list, GArray *entry); + + /* Return the number of used redistributor regions */ + static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) +-- +2.19.1 diff --git a/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch b/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch new file mode 100644 index 0000000000000000000000000000000000000000..7926e7fa0db4ef16737a89782ea661273ef8c4d3 --- /dev/null +++ b/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch @@ -0,0 +1,116 @@ +From 929d29ec7bf9dd6ec3802bea2148a041ff30d59b Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 14 Apr 2020 21:17:09 +0800 +Subject: [PATCH] aio-wait: delegate polling of main AioContext if BQL not held + +Any thread that is not a iothread returns NULL for qemu_get_current_aio_context(). +As a result, it would also return true for +in_aio_context_home_thread(qemu_get_aio_context()), causing +AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect +if the BQL is not held, because aio_poll() does not expect to +run concurrently from multiple threads, and it can actually +happen when savevm writes to the vmstate file from the +migration thread. + +Therefore, restrict in_aio_context_home_thread to return true +for the main AioContext only if the BQL is held. + +The function is moved to aio-wait.h because it is mostly used +there and to avoid a circular reference between main-loop.h +and block/aio.h. + +Signed-off-by: Paolo Bonzini +Message-Id: <20200407140746.8041-5-pbonzini@redhat.com> +Signed-off-by: Stefan Hajnoczi +--- + include/block/aio-wait.h | 22 ++++++++++++++++++++++ + include/block/aio.h | 29 ++++++++++------------------- + 2 files changed, 32 insertions(+), 19 deletions(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index afeeb18f..716d2639 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -26,6 +26,7 @@ + #define QEMU_AIO_WAIT_H + + #include "block/aio.h" ++#include "qemu/main-loop.h" + + /** + * AioWait: +@@ -124,4 +125,25 @@ void aio_wait_kick(void); + */ + void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + ++/** ++ * in_aio_context_home_thread: ++ * @ctx: the aio context ++ * ++ * Return whether we are running in the thread that normally runs @ctx. Note ++ * that acquiring/releasing ctx does not affect the outcome, each AioContext ++ * still only has one home thread that is responsible for running it. ++ */ ++static inline bool in_aio_context_home_thread(AioContext *ctx) ++{ ++ if (ctx == qemu_get_current_aio_context()) { ++ return true; ++ } ++ ++ if (ctx == qemu_get_aio_context()) { ++ return qemu_mutex_iothread_locked(); ++ } else { ++ return false; ++ } ++} ++ + #endif /* QEMU_AIO_WAIT_H */ +diff --git a/include/block/aio.h b/include/block/aio.h +index 6b0d52f7..9d28e247 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -60,12 +60,16 @@ struct AioContext { + QLIST_HEAD(, AioHandler) aio_handlers; + + /* Used to avoid unnecessary event_notifier_set calls in aio_notify; +- * accessed with atomic primitives. If this field is 0, everything +- * (file descriptors, bottom halves, timers) will be re-evaluated +- * before the next blocking poll(), thus the event_notifier_set call +- * can be skipped. If it is non-zero, you may need to wake up a +- * concurrent aio_poll or the glib main event loop, making +- * event_notifier_set necessary. ++ * only written from the AioContext home thread, or under the BQL in ++ * the case of the main AioContext. However, it is read from any ++ * thread so it is still accessed with atomic primitives. ++ * ++ * If this field is 0, everything (file descriptors, bottom halves, ++ * timers) will be re-evaluated before the next blocking poll() or ++ * io_uring wait; therefore, the event_notifier_set call can be ++ * skipped. If it is non-zero, you may need to wake up a concurrent ++ * aio_poll or the glib main event loop, making event_notifier_set ++ * necessary. + * + * Bit 0 is reserved for GSource usage of the AioContext, and is 1 + * between a call to aio_ctx_prepare and the next call to aio_ctx_check. +@@ -580,19 +584,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); + */ + AioContext *qemu_get_current_aio_context(void); + +-/** +- * in_aio_context_home_thread: +- * @ctx: the aio context +- * +- * Return whether we are running in the thread that normally runs @ctx. Note +- * that acquiring/releasing ctx does not affect the outcome, each AioContext +- * still only has one home thread that is responsible for running it. +- */ +-static inline bool in_aio_context_home_thread(AioContext *ctx) +-{ +- return ctx == qemu_get_current_aio_context(); +-} +- + /** + * aio_context_setup: + * @ctx: the aio context +-- +2.23.0 diff --git a/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch b/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a96fc5ce1372f0e2f59ab9019cf8d72e0ee7bee --- /dev/null +++ b/apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch @@ -0,0 +1,50 @@ +From 3bdd21c4b7d80cacc6b5f1b26ab52ef3a0aceb06 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 16 Oct 2019 10:29:32 +0800 +Subject: [PATCH 7/8] apic: Use 32bit APIC ID for migration instance ID + +Migration is silently broken now with x2apic config like this: + + -smp 200,maxcpus=288,sockets=2,cores=72,threads=2 \ + -device intel-iommu,intremap=on,eim=on + +After migration, the guest kernel could hang at anything, due to +x2apic bit not migrated correctly in IA32_APIC_BASE on some vcpus, so +any operations related to x2apic could be broken then (e.g., RDMSR on +x2apic MSRs could fail because KVM would think that the vcpu hasn't +enabled x2apic at all). + +The issue is that the x2apic bit was never applied correctly for vcpus +whose ID > 255 when migrate completes, and that's because when we +migrate APIC we use the APICCommonState.id as instance ID of the +migration stream, while that's too short for x2apic. + +Let's use the newly introduced initial_apic_id for that. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Reviewed-by: Eduardo Habkost +Signed-off-by: Juan Quintela +--- + hw/intc/apic_common.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index 07adba0..2c0cb1e 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -313,7 +313,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- uint32_t instance_id = s->id; ++ uint32_t instance_id = s->initial_apic_id; ++ ++ /* Normally initial APIC ID should be no more than hundreds */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +-- +1.8.3.1 + diff --git a/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch b/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch new file mode 100644 index 0000000000000000000000000000000000000000..68814a8d8135faf707bacbff017fe8bc84d71f54 --- /dev/null +++ b/arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch @@ -0,0 +1,68 @@ +From 6d795b30ff09bc1f799daa454f776d682cc77197 Mon Sep 17 00:00:00 2001 +From: zhanghao1 +Date: Tue, 11 May 2021 20:17:16 +0800 +Subject: [PATCH] arm/cpu: Fixed function undefined error at compile time under + arm + + Add the compilation option CONFIG_KVM while using + "kvm_arm_cpu_feature_supported" and "kvm_arm_get_one_reg". + In arm, the default value of CONFIG_KVM is no. + + While the target is arm, the compilation fails because + the function "kvm_arm_cpu_feature_supporte" is declared + or the function "kvm_arm_get_one_reg" is not defined. + +Signed-off-by: zhanghao1 +--- + target/arm/helper.c | 4 ++++ + target/arm/kvm_arm.h | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index bddd355fa0..9d2b2659f6 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -284,6 +284,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + + newval = read_raw_cp_reg(&cpu->env, ri); + if (kvm_sync) { ++#ifdef CONFIG_KVM + if (is_id_reg(ri)) { + /* Only sync if we can sync to KVM successfully. */ + uint64_t oldval; +@@ -306,6 +307,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + + kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); + } else { ++#endif + /* + * Only sync if the previous list->cpustate sync succeeded. + * Rather than tracking the success/failure state for every +@@ -324,7 +326,9 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + } + + write_raw_cp_reg(&cpu->env, ri, newval); ++#ifdef CONFIG_KVM + } ++#endif + } + cpu->cpreg_values[i] = newval; + } +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 49e80878f4..a223967d4d 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -312,6 +312,10 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + + static inline void kvm_arm_add_vcpu_properties(Object *obj) {} + ++static inline bool kvm_arm_cpu_feature_supported(void) { ++ return false; ++} ++ + static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) + { + return -ENOENT; +-- +2.27.0 + diff --git a/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch b/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch new file mode 100644 index 0000000000000000000000000000000000000000..84903c34d5a86cac12aadf7d35271295b49f143d --- /dev/null +++ b/arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch @@ -0,0 +1,42 @@ +From d8e0b51447d8c64788cd7f9b0fa75c4ccb06f8eb Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 10:17:27 +0800 +Subject: [PATCH] arm/cpu: assign arm_get_arch_id handler to get_arch_id hook + +This hook will be called in get_cpu_status, which is called +during cpu hotplug. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + target/arm/cpu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 39bbe7e2d7..1ccb30e5eb 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2575,6 +2575,13 @@ static gchar *arm_gdb_arch_name(CPUState *cs) + return g_strdup("arm"); + } + ++static int64_t arm_cpu_get_arch_id(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ return cpu->mp_affinity; ++} ++ + static void arm_cpu_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); +@@ -2596,6 +2603,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + cc->synchronize_from_tb = arm_cpu_synchronize_from_tb; + cc->gdb_read_register = arm_cpu_gdb_read_register; + cc->gdb_write_register = arm_cpu_gdb_write_register; ++ cc->get_arch_id = arm_cpu_get_arch_id; + #ifndef CONFIG_USER_ONLY + cc->do_interrupt = arm_cpu_do_interrupt; + cc->get_phys_page_attrs_debug = arm_cpu_get_phys_page_attrs_debug; +-- +2.19.1 diff --git a/arm-virt-Add-CPU-hotplug-framework.patch b/arm-virt-Add-CPU-hotplug-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..5de672afeab55fe5af5da3d2ce58aa2e9ae1435d --- /dev/null +++ b/arm-virt-Add-CPU-hotplug-framework.patch @@ -0,0 +1,66 @@ +From 6d287b3f1d961cc4adda1c6a452f41db84466f5a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 3 Apr 2020 16:16:18 +0800 +Subject: [PATCH] arm/virt: Add CPU hotplug framework + +Establish the CPU hotplug framework for arm/virt, we will add +necessary code legs to this framework gradually to realize CPU +hotplug finally. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d09a5773df..0bd37af26c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2077,11 +2077,25 @@ out: + error_propagate(errp, local_err); + } + ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ /* Currently nothing to do */ ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ /* Currently nothing to do */ ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + virt_memory_pre_plug(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -2098,6 +2112,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + } + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + virt_memory_plug(hotplug_dev, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + } + +@@ -2112,7 +2128,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + DeviceState *dev) + { + if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) || +- (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) { ++ object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || ++ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + +-- +2.19.1 diff --git a/arm-virt-Add-CPU-topology-support.patch b/arm-virt-Add-CPU-topology-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7813c637449328c5fecf7575d27a06fa1fa0700 --- /dev/null +++ b/arm-virt-Add-CPU-topology-support.patch @@ -0,0 +1,219 @@ +From cde57fcae2ed16a10e1ef7f2da0ec368883988ba Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 6 Apr 2020 10:54:35 +0800 +Subject: [PATCH] arm/virt: Add CPU topology support + +The CPU topology specified by user (through -smp options) is used in +ACPI PPTT. Now we will use this information to locate which CPU to +plug or unplug. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 68 +++++++++++++++++++++++++++++++++++++-- + include/hw/arm/topology.h | 61 +++++++++++++++++++++++++++++++++++ + target/arm/cpu.c | 3 ++ + target/arm/cpu.h | 3 ++ + 4 files changed, 133 insertions(+), 2 deletions(-) + create mode 100644 include/hw/arm/topology.h + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0bd37af26c..64532b61b2 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -36,6 +36,7 @@ + #include "hw/sysbus.h" + #include "hw/arm/boot.h" + #include "hw/arm/primecell.h" ++#include "hw/arm/topology.h" + #include "hw/arm/virt.h" + #include "hw/block/flash.h" + #include "hw/vfio/vfio-calxeda-xgmac.h" +@@ -2020,6 +2021,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + int n; + unsigned int max_cpus = ms->smp.max_cpus; + VirtMachineState *vms = VIRT_MACHINE(ms); ++ ARMCPUTopoInfo topo; + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); +@@ -2031,10 +2033,17 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; ++ ms->possible_cpus->cpus[n].vcpus_count = 1; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); ++ ++ topo_ids_from_idx(n, ms->smp.cores, ms->smp.threads, &topo); ++ ms->possible_cpus->cpus[n].props.has_socket_id = true; ++ ms->possible_cpus->cpus[n].props.socket_id = topo.pkg_id; ++ ms->possible_cpus->cpus[n].props.has_core_id = true; ++ ms->possible_cpus->cpus[n].props.core_id = topo.core_id; + ms->possible_cpus->cpus[n].props.has_thread_id = true; +- ms->possible_cpus->cpus[n].props.thread_id = n; ++ ms->possible_cpus->cpus[n].props.thread_id = topo.smt_id; + } + return ms->possible_cpus; + } +@@ -2080,7 +2089,62 @@ out: + static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- /* Currently nothing to do */ ++ CPUState *cs = CPU(dev); ++ ARMCPUTopoInfo topo; ++ ARMCPU *cpu = ARM_CPU(dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ int smp_cores = ms->smp.cores; ++ int smp_threads = ms->smp.threads; ++ ++ /* if cpu idx is not set, set it based on socket/core/thread properties */ ++ if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { ++ int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; ++ if (cpu->socket_id < 0 || cpu->socket_id >= max_socket) { ++ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", ++ cpu->socket_id, max_socket - 1); ++ return; ++ } ++ if (cpu->core_id < 0 || cpu->core_id >= smp_cores) { ++ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", ++ cpu->core_id, smp_cores - 1); ++ return; ++ } ++ if (cpu->thread_id < 0 || cpu->thread_id >= smp_threads) { ++ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", ++ cpu->thread_id, smp_threads - 1); ++ return; ++ } ++ ++ topo.pkg_id = cpu->socket_id; ++ topo.core_id = cpu->core_id; ++ topo.smt_id = cpu->thread_id; ++ cs->cpu_index = idx_from_topo_ids(smp_cores, smp_threads, &topo); ++ } ++ ++ /* if 'address' properties socket-id/core-id/thread-id are not set, set them ++ * so that machine_query_hotpluggable_cpus would show correct values ++ */ ++ topo_ids_from_idx(cs->cpu_index, smp_cores, smp_threads, &topo); ++ if (cpu->socket_id != -1 && cpu->socket_id != topo.pkg_id) { ++ error_setg(errp, "property socket-id: %u doesn't match set idx:" ++ " 0x%x (socket-id: %u)", cpu->socket_id, cs->cpu_index, topo.pkg_id); ++ return; ++ } ++ cpu->socket_id = topo.pkg_id; ++ ++ if (cpu->core_id != -1 && cpu->core_id != topo.core_id) { ++ error_setg(errp, "property core-id: %u doesn't match set idx:" ++ " 0x%x (core-id: %u)", cpu->core_id, cs->cpu_index, topo.core_id); ++ return; ++ } ++ cpu->core_id = topo.core_id; ++ ++ if (cpu->thread_id != -1 && cpu->thread_id != topo.smt_id) { ++ error_setg(errp, "property thread-id: %u doesn't match set idx:" ++ " 0x%x (thread-id: %u)", cpu->thread_id, cs->cpu_index, topo.smt_id); ++ return; ++ } ++ cpu->thread_id = topo.smt_id; + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, +diff --git a/include/hw/arm/topology.h b/include/hw/arm/topology.h +new file mode 100644 +index 0000000000..a3e5f436c5 +--- /dev/null ++++ b/include/hw/arm/topology.h +@@ -0,0 +1,61 @@ ++/* ++ * ARM CPU topology data structures and functions ++ * ++ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#ifndef HW_ARM_TOPOLOGY_H ++#define HW_ARM_TOPOLOGY_H ++ ++typedef struct ARMCPUTopoInfo { ++ unsigned pkg_id; ++ unsigned core_id; ++ unsigned smt_id; ++} ARMCPUTopoInfo; ++ ++/* Calculate (contiguous) CPU index based on topology */ ++static inline unsigned idx_from_topo_ids(unsigned nr_cores, ++ unsigned nr_threads, ++ const ARMCPUTopoInfo *topo) ++{ ++ assert(nr_cores > 0); ++ assert(nr_threads > 0); ++ assert(topo != NULL); ++ ++ return topo->pkg_id * nr_cores * nr_threads + ++ topo->core_id * nr_threads + ++ topo->smt_id; ++} ++ ++/* Calculate thread/core/package topology ++ * based on (contiguous) CPU index ++ */ ++static inline void topo_ids_from_idx(unsigned cpu_index, ++ unsigned nr_cores, ++ unsigned nr_threads, ++ ARMCPUTopoInfo *topo) ++{ ++ assert(nr_cores > 0); ++ assert(nr_threads > 0); ++ assert(topo != NULL); ++ ++ topo->smt_id = cpu_index % nr_threads; ++ topo->core_id = cpu_index / nr_threads % nr_cores; ++ topo->pkg_id = cpu_index / nr_threads / nr_cores; ++} ++ ++#endif /* HW_ARM_TOPOLOGY_H */ ++ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 1ccb30e5eb..91f1e36cd8 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2560,6 +2560,9 @@ static Property arm_cpu_properties[] = { + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, + mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, -1), ++ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, -1), ++ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, -1), + DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + DEFINE_PROP_END_OF_LIST() + }; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index e19531a77b..219c222b89 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -916,6 +916,9 @@ struct ARMCPU { + QLIST_HEAD(, ARMELChangeHook) el_change_hooks; + + int32_t node_id; /* NUMA node this CPU belongs to */ ++ int32_t socket_id; ++ int32_t core_id; ++ int32_t thread_id; + + /* Used to synchronize KVM and QEMU in-kernel device levels */ + uint8_t device_irq_level; +-- +2.19.1 diff --git a/arm-virt-Add-cpu_hotplug_enabled-field.patch b/arm-virt-Add-cpu_hotplug_enabled-field.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b8bc47f6d31dba63e1148c22cc803204ce82e70 --- /dev/null +++ b/arm-virt-Add-cpu_hotplug_enabled-field.patch @@ -0,0 +1,61 @@ +From 31873c4c0454fb17654f57adece2bc396415f8bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 13:50:40 +0800 +Subject: [PATCH] arm/virt: Add cpu_hotplug_enabled field + +Some conditions must be satisfied to support CPU hotplug, including +ACPI, GED, 64bit CPU, GICv3. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 7 +++++++ + include/hw/arm/virt.h | 1 + + 2 files changed, 8 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index dda22194b5..304a4c2d31 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1645,6 +1645,7 @@ static void machvirt_init(MachineState *machine) + { + VirtMachineState *vms = VIRT_MACHINE(machine); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); ++ MachineState *ms = MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; + MemoryRegion *sysmem = get_system_memory(); +@@ -1655,6 +1656,7 @@ static void machvirt_init(MachineState *machine) + bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; ++ ObjectClass *cpu_class; + + /* + * In accelerated mode, the memory map is computed earlier in kvm_type() +@@ -1760,6 +1762,11 @@ static void machvirt_init(MachineState *machine) + + create_fdt(vms); + ++ cpu_class = object_class_by_name(ms->cpu_type); ++ vms->cpu_hotplug_enabled = has_ged && firmware_loaded && ++ acpi_enabled && vms->gic_version == 3 && ++ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ + possible_cpus = mc->possible_cpu_arch_ids(machine); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index beef4c8002..b4c53d920e 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -126,6 +126,7 @@ typedef struct { + bool highmem_ecam; + bool its; + bool virt; ++ bool cpu_hotplug_enabled; + int32_t gic_version; + VirtIOMMUType iommu; + struct arm_boot_info bootinfo; +-- +2.19.1 diff --git a/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch b/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch new file mode 100644 index 0000000000000000000000000000000000000000..c81227d8a3ef4ff3ffc74f7b848b42e8cc79c762 --- /dev/null +++ b/arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch @@ -0,0 +1,66 @@ +From 7cfb37c50209208f853c6fbd0df6673a95e03ef9 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 14:16:40 +0800 +Subject: [PATCH] arm/virt: Add some sanity checks in cpu_pre_plug hook + +For that user will try to hotplug a CPU when preconditions +are not satisfied, check these CPU hotplug preconditions in +pre_plug hook. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 983084c459..c6a99e683a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2086,10 +2086,30 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(hotplug_dev); + const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); ++ const CPUArchId *cpu_slot = NULL; + MemoryRegion *sysmem = get_system_memory(); + int smp_cores = ms->smp.cores; + int smp_threads = ms->smp.threads; + ++ /* Some hotplug capability checks */ ++ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if (dev->hotplugged && !vms->acpi_dev) { ++ error_setg(errp, "CPU hotplug is disabled: missing acpi device."); ++ return; ++ } ++ ++ if (dev->hotplugged && !vms->cpu_hotplug_enabled) { ++ error_setg(errp, "CPU hotplug is disabled: " ++ "should use AArch64 CPU and GICv3."); ++ return; ++ } ++ + /* if cpu idx is not set, set it based on socket/core/thread properties */ + if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { + int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; +@@ -2145,6 +2165,13 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + object_property_set_int(cpuobj, possible_cpus->cpus[cs->cpu_index].arch_id, + "mp-affinity", NULL); + ++ cpu_slot = &possible_cpus->cpus[cs->cpu_index]; ++ if (cpu_slot->cpu) { ++ error_setg(errp, "CPU[%d] with mp_affinity %" PRIu64 " exists", ++ cs->cpu_index, cpu->mp_affinity); ++ return; ++ } ++ + numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), + &error_fatal); + +-- +2.19.1 diff --git a/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch b/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch new file mode 100644 index 0000000000000000000000000000000000000000..ade3ccfd9fe50a78c8ef33ded3463e52b5f6d6c3 --- /dev/null +++ b/arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch @@ -0,0 +1,100 @@ +From d38d1d4e859450535ddc6bf0c7a59f6217b1403c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 5 Apr 2020 16:03:15 +0800 +Subject: [PATCH] arm/virt: Attach ACPI CPU hotplug support to virt + +Attach cpus aml building and GED support for CPU hotplug to +arm/virt, but currently we make it diabled by not add CPU +hotplug event to GED. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 15 ++++++++++++++- + hw/arm/virt.c | 6 ++++++ + include/hw/arm/virt.h | 1 + + 3 files changed, 21 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 8b68a15d76..dbe9acb148 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -806,6 +806,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; + const int *irqmap = vms->irqmap; ++ bool cpu_aml_built = false; + + dsdt = init_aml_allocator(); + /* Reserve space for header */ +@@ -817,7 +818,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); +@@ -845,6 +845,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + AML_SYSTEM_MEMORY, + memmap[VIRT_PCDIMM_ACPI].base); + } ++ ++ if (event & ACPI_GED_CPU_HOTPLUG_EVT) { ++ CPUHotplugFeatures opts = { ++ .acpi_1_compatible = false, .has_legacy_cphp = false ++ }; ++ build_cpus_aml(dsdt, ms, opts, memmap[VIRT_CPU_ACPI].base, ++ "\\_SB", NULL, AML_SYSTEM_MEMORY); ++ cpu_aml_built = true; ++ } ++ } ++ ++ if (!cpu_aml_built) { ++ acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); + } + + acpi_dsdt_add_power_button(scope); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8638aeedb7..d09a5773df 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -140,6 +140,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_SMMU] = { 0x09050000, 0x00020000 }, + [VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN }, + [VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN }, ++ [VIRT_CPU_ACPI] = { 0x09090000, ACPI_CPU_HOTPLUG_REG_LEN }, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +@@ -645,11 +646,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + event |= ACPI_GED_MEM_HOTPLUG_EVT; + } + ++ /* event |= ACPI_GED_CPU_HOTPLUG_EVT; ++ * Currently CPU hotplug is not enabled. ++ */ ++ + dev = qdev_create(NULL, TYPE_ACPI_GED); + qdev_prop_set_uint32(dev, "ged-event", event); + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, vms->memmap[VIRT_CPU_ACPI].base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + qdev_init_nofail(dev); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index cbdea7ff32..6880ebe07c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -81,6 +81,7 @@ enum { + VIRT_SECURE_MEM, + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, ++ VIRT_CPU_ACPI, + VIRT_LOWMEMMAP_LAST, + }; + +-- +2.19.1 diff --git a/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch b/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch new file mode 100644 index 0000000000000000000000000000000000000000..c2d9a3cb0a48436433a30670e8517d7dafb9bca4 --- /dev/null +++ b/arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch @@ -0,0 +1,124 @@ +From bf47ef282bfe8b0a98e1f87d8708051ffa7192a1 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 13:55:11 +0800 +Subject: [PATCH] arm/virt: Pre-sizing MADT-GICC PPTT GICv3 and Pre-park KVM + vCPU + +Establish all pre-sizing facilities based on cpu_hotplug_enabled +field. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 12 +++++++++++- + hw/arm/virt.c | 14 ++++++++++++-- + target/arm/kvm.c | 6 +++--- + 3 files changed, 26 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index efac788ba1..2cfac7b84f 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -736,6 +736,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + gicd->base_address = cpu_to_le64(memmap[VIRT_GIC_DIST].base); + gicd->version = vms->gic_version; + ++ if (vms->cpu_hotplug_enabled) { ++ num_cpu = ms->smp.max_cpus; ++ } + for (i = 0; i < num_cpu; i++) { + virt_madt_cpu_entry(NULL, i, possible_cpus, table_data); + } +@@ -902,9 +905,11 @@ static + void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); + GArray *table_offsets; + unsigned dsdt, xsdt; + GArray *tables_blob = tables->table_data; ++ int num_cpus; + + table_offsets = g_array_new(false, true /* clear */, + sizeof(uint32_t)); +@@ -923,7 +928,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + + acpi_add_table(table_offsets, tables_blob); + +- build_pptt(tables_blob, tables->linker, vms->smp_cpus); ++ if (vms->cpu_hotplug_enabled) { ++ num_cpus = ms->smp.max_cpus; ++ } else { ++ num_cpus = ms->smp.cpus; ++ } ++ build_pptt(tables_blob, tables->linker, num_cpus); + + acpi_add_table(table_offsets, tables_blob); + build_madt(tables_blob, tables->linker, vms); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 304a4c2d31..983084c459 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -767,6 +767,9 @@ static void create_gic(VirtMachineState *vms) + unsigned int smp_cpus = ms->smp.cpus; + uint32_t nb_redist_regions = 0; + ++ if (vms->cpu_hotplug_enabled) { ++ num_cpus = ms->smp.max_cpus; ++ } + assert(num_cpus >= smp_cpus); + + gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); +@@ -1772,8 +1775,15 @@ static void machvirt_init(MachineState *machine) + Object *cpuobj; + CPUState *cs; + ++ if (kvm_enabled() && vms->cpu_hotplug_enabled) { ++ if (kvm_create_parked_vcpu(n) < 0) { ++ error_report("mach-virt: Create KVM parked vCPU failed"); ++ exit(1); ++ } ++ } ++ + if (n >= smp_cpus) { +- break; ++ continue; + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +@@ -1857,7 +1867,7 @@ static void machvirt_init(MachineState *machine) + vms->bootinfo.kernel_filename = machine->kernel_filename; + vms->bootinfo.kernel_cmdline = machine->kernel_cmdline; + vms->bootinfo.initrd_filename = machine->initrd_filename; +- vms->bootinfo.nb_cpus = smp_cpus; ++ vms->bootinfo.nb_cpus = vms->cpu_hotplug_enabled ? max_cpus : smp_cpus; + vms->bootinfo.board_id = -1; + vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; + vms->bootinfo.get_dtb = machvirt_dtb; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 327b3bc338..4f131f687d 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -202,7 +202,7 @@ int kvm_arm_get_max_vm_ipa_size(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + int ret = 0; +- unsigned int smp_cpus = ms->smp.cpus; ++ unsigned int max_cpus = ms->smp.max_cpus; + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ +@@ -216,9 +216,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + + cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); + +- if (smp_cpus > 256 && ++ if (max_cpus > 256 && + !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) { +- error_report("Using more than 256 vcpus requires a host kernel " ++ error_report("Using more than max 256 vcpus requires a host kernel " + "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2"); + ret = -EINVAL; + } +-- +2.19.1 diff --git a/arm-virt-Start-up-CPU-hot-plug.patch b/arm-virt-Start-up-CPU-hot-plug.patch new file mode 100644 index 0000000000000000000000000000000000000000..5ba620a2215710682afa4ccdfd0d5cad53556680 --- /dev/null +++ b/arm-virt-Start-up-CPU-hot-plug.patch @@ -0,0 +1,159 @@ +From 11f9628ceff019259ff12ce469deafbf50eb3075 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 14:20:59 +0800 +Subject: [PATCH] arm/virt: Start up CPU hot-plug + +All the CPU hotplug facilities are ready. Assemble them +to start up CPU hot-plug capability for arm/virt. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 61 ++++++++++++++++++++++++++++++++++++++++--- + include/hw/arm/virt.h | 1 + + qom/cpu.c | 5 ++++ + target/arm/cpu.c | 2 ++ + 4 files changed, 65 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c6a99e683a..112a6ae7cb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -48,6 +48,8 @@ + #include "sysemu/cpus.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" ++#include "sysemu/cpus.h" ++#include "sysemu/hw_accel.h" + #include "hw/loader.h" + #include "exec/address-spaces.h" + #include "qemu/bitops.h" +@@ -649,9 +651,9 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + event |= ACPI_GED_MEM_HOTPLUG_EVT; + } + +- /* event |= ACPI_GED_CPU_HOTPLUG_EVT; +- * Currently CPU hotplug is not enabled. +- */ ++ if (vms->cpu_hotplug_enabled) { ++ event |= ACPI_GED_CPU_HOTPLUG_EVT; ++ } + + dev = qdev_create(NULL, TYPE_ACPI_GED); + qdev_prop_set_uint32(dev, "ged-event", event); +@@ -2214,12 +2216,62 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + object_property_set_link(cpuobj, OBJECT(secure_sysmem), + "secure-memory", &error_abort); + } ++ ++ /* If we use KVM accel, we should pause all vcpus to ++ * allow hot access of vcpu registers. ++ */ ++ if (dev->hotplugged && kvm_enabled()) { ++ pause_all_vcpus(); ++ } + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- /* Currently nothing to do */ ++ CPUArchId *cpu_slot; ++ CPUState *cs = CPU(dev); ++ int ncpu = cs->cpu_index; ++ MachineState *ms = MACHINE(hotplug_dev); ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ GICv3State *gicv3; ++ ARMGICv3CommonClass *agcc; ++ Error *local_err = NULL; ++ ++ if (dev->hotplugged) { ++ /* Realize GIC related parts of CPU */ ++ assert(vms->gic_version == 3); ++ gicv3 = ARM_GICV3_COMMON(vms->gic); ++ agcc = ARM_GICV3_COMMON_GET_CLASS(gicv3); ++ agcc->cpu_hotplug_realize(gicv3, ncpu); ++ connect_gic_cpu_irqs(vms, ncpu); ++ ++ /* Register CPU reset and trigger it manually */ ++ cpu_synchronize_state(cs); ++ cpu_hotplug_register_reset(ncpu); ++ cpu_hotplug_reset_manually(ncpu); ++ cpu_synchronize_post_reset(cs); ++ ++ if (kvm_enabled()) { ++ resume_all_vcpus(); ++ } ++ } ++ ++ if (vms->acpi_dev) { ++ hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ vms->boot_cpus++; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } ++ ++ cpu_slot = &ms->possible_cpus->cpus[ncpu]; ++ cpu_slot->cpu = OBJECT(dev); ++out: ++ error_propagate(errp, local_err); + } + + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, +@@ -2324,6 +2376,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15"); + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + mc->kvm_type = virt_kvm_type; ++ mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->pre_plug = virt_machine_device_pre_plug_cb; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index b4c53d920e..a9429bed25 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -140,6 +140,7 @@ typedef struct { + uint32_t msi_phandle; + uint32_t iommu_phandle; + int psci_conduit; ++ uint32_t boot_cpus; + hwaddr highest_gpa; + DeviceState *gic; + DeviceState *acpi_dev; +diff --git a/qom/cpu.c b/qom/cpu.c +index f376f782d8..58cd9d5bbc 100644 +--- a/qom/cpu.c ++++ b/qom/cpu.c +@@ -342,7 +342,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp) + + if (dev->hotplugged) { + cpu_synchronize_post_init(cpu); ++ ++#ifdef __aarch64__ ++ if (!kvm_enabled()) ++#endif + cpu_resume(cpu); ++ + } + + /* NOTE: latest generic point where the cpu is fully realized */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 91f1e36cd8..811e5c6365 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2598,6 +2598,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + acc->parent_reset = cc->reset; + cc->reset = arm_cpu_reset; + ++ dc->user_creatable = true; ++ + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->cpu_exec_interrupt = arm_cpu_exec_interrupt; +-- +2.19.1 diff --git a/arm-virt-Support-CPU-cold-plug.patch b/arm-virt-Support-CPU-cold-plug.patch new file mode 100644 index 0000000000000000000000000000000000000000..3f96fede24c1e4f18d0f05c1987b20cc5a883b93 --- /dev/null +++ b/arm-virt-Support-CPU-cold-plug.patch @@ -0,0 +1,92 @@ +From e3a1af72fca5bbcc840fba44d512bbe69ec55ca9 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 12 May 2020 15:05:06 +0800 +Subject: [PATCH] arm/virt: Support CPU cold plug + +This adds CPU cold plug support to arm virt machine board. +CPU cold plug means adding CPU by using "-device xx-arm-cpu" +when we bring up Qemu. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 36 +++++++++++++++++++----------------- + 1 file changed, 19 insertions(+), 17 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 112a6ae7cb..4c7279392f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2093,25 +2093,12 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + int smp_cores = ms->smp.cores; + int smp_threads = ms->smp.threads; + +- /* Some hotplug capability checks */ +- + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", + ms->cpu_type); + return; + } + +- if (dev->hotplugged && !vms->acpi_dev) { +- error_setg(errp, "CPU hotplug is disabled: missing acpi device."); +- return; +- } +- +- if (dev->hotplugged && !vms->cpu_hotplug_enabled) { +- error_setg(errp, "CPU hotplug is disabled: " +- "should use AArch64 CPU and GICv3."); +- return; +- } +- + /* if cpu idx is not set, set it based on socket/core/thread properties */ + if (cs->cpu_index == UNASSIGNED_CPU_INDEX) { + int max_socket = ms->smp.max_cpus / smp_threads / smp_cores; +@@ -2137,6 +2124,20 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + cs->cpu_index = idx_from_topo_ids(smp_cores, smp_threads, &topo); + } + ++ /* Some hotplug capability checks */ ++ if (cs->cpu_index >= ms->smp.cpus) { ++ if (!vms->acpi_dev) { ++ error_setg(errp, "CPU cold/hot plug is disabled: " ++ "missing acpi device."); ++ return; ++ } ++ if (!vms->cpu_hotplug_enabled) { ++ error_setg(errp, "CPU cold/hot plug is disabled: " ++ "should use AArch64 CPU and GICv3."); ++ return; ++ } ++ } ++ + /* if 'address' properties socket-id/core-id/thread-id are not set, set them + * so that machine_query_hotpluggable_cpus would show correct values + */ +@@ -2237,7 +2238,8 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, + ARMGICv3CommonClass *agcc; + Error *local_err = NULL; + +- if (dev->hotplugged) { ++ /* For CPU that is cold/hot plugged */ ++ if (ncpu >= ms->smp.cpus) { + /* Realize GIC related parts of CPU */ + assert(vms->gic_version == 3); + gicv3 = ARM_GICV3_COMMON(vms->gic); +@@ -2250,10 +2252,10 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, + cpu_hotplug_register_reset(ncpu); + cpu_hotplug_reset_manually(ncpu); + cpu_synchronize_post_reset(cs); ++ } + +- if (kvm_enabled()) { +- resume_all_vcpus(); +- } ++ if (dev->hotplugged && kvm_enabled()) { ++ resume_all_vcpus(); + } + + if (vms->acpi_dev) { +-- +2.19.1 + diff --git a/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch new file mode 100644 index 0000000000000000000000000000000000000000..297ccf633fe69840b863f10109ee4271f16c11a1 --- /dev/null +++ b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch @@ -0,0 +1,65 @@ +From 91fed8840b004ec7bc91969afa10f03e13f311c4 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Wed, 22 Apr 2020 19:52:58 +0800 +Subject: [PATCH] arm/virt/acpi: Extend cpufreq to support max_cpus + +We will support CPU hotplug soon, so extend memory region size to +allow hotplugged CPU access cpufreq space. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpufreq.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c +index d02a25a6de..38dcab5683 100644 +--- a/hw/acpi/cpufreq.c ++++ b/hw/acpi/cpufreq.c +@@ -84,6 +84,7 @@ typedef struct CpuhzState { + uint32_t PerformanceLimited; + uint32_t LowestFreq; + uint32_t NominalFreq; ++ uint32_t num_cpu; + uint32_t reg_size; + } CpuhzState; + +@@ -95,10 +96,7 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, + uint64_t r; + uint64_t n; + +- MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; +- +- if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ if (offset >= s->num_cpu * CPPC_REG_PER_CPU_STRIDE) { + warn_report("cpufreq_read: offset 0x%lx out of range", offset); + return 0; + } +@@ -166,11 +164,10 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, + static void cpufreq_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) + { ++ CpuhzState *s = CPUFREQ(opaque); + uint64_t n; +- MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; + +- if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ if (offset >= s->num_cpu * CPPC_REG_PER_CPU_STRIDE) { + error_printf("cpufreq_write: offset 0x%lx out of range", offset); + return; + } +@@ -251,9 +248,9 @@ static void cpufreq_init(Object *obj) + CpuhzState *s = CPUFREQ(obj); + + MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; ++ s->num_cpu = ms->smp.max_cpus; + +- s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; ++ s->reg_size = s->num_cpu * CPPC_REG_PER_CPU_STRIDE; + if (s->reg_size > MAX_SUPPORT_SPACE) { + error_report("Required space 0x%x excesses the max support 0x%x", + s->reg_size, MAX_SUPPORT_SPACE); +-- +2.19.1 diff --git a/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..f08f83de6505d7810cb7b6753bd00034e3921862 --- /dev/null +++ b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch @@ -0,0 +1,121 @@ +From 2fdece10dac6161cb6c1f0f05247391aa3269eed Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Wed, 22 Apr 2020 15:58:27 +0800 +Subject: [PATCH] arm/virt/acpi: Factor out CPPC building from DSDT CPU aml + +When CPU hotplug is enabled, we will use build_cpus_aml instead of +acpi_dsdt_add_cpus, so factor out CPPC building and we can reuse it +in build_cpus_aml. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 1 + + hw/arm/virt-acpi-build.c | 33 +++++++++++++++++----------- + include/hw/acpi/acpi_dev_interface.h | 2 ++ + include/hw/arm/virt.h | 2 ++ + 4 files changed, 25 insertions(+), 13 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index b834ae3ff6..82139b4314 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -289,6 +289,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + + adevc->send_event = acpi_ged_send_event; + adevc->madt_cpu = virt_madt_cpu_entry; ++ adevc->cpu_cppc = virt_acpi_dsdt_cpu_cppc; + } + + static const TypeInfo acpi_ged_info = { +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 4b6aace433..8b68a15d76 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -111,8 +111,24 @@ static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) + aml_append(dev, aml_name_decl("_CPC", cpc)); + } + +-static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, +- const MemMapEntry *cppc_memmap) ++void virt_acpi_dsdt_cpu_cppc(AcpiDeviceIf *adev, int ncpu, int num_cpu, Aml *dev) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); ++ const MemMapEntry *cppc_memmap = &vms->memmap[VIRT_CPUFREQ]; ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + ncpu * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, num_cpu); ++ } ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, VirtMachineState *vms) + { + uint16_t i; + +@@ -121,16 +137,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus, + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); + +- /* +- * Append _CPC and _PSD to support CPU frequence show +- * Check CPPC available by DESIRED_PERF register +- */ +- if (cppc_regs_offset[DESIRED_PERF] != -1) { +- acpi_dsdt_add_cppc(dev, +- cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, +- cppc_regs_offset); +- acpi_dsdt_add_psd(dev, smp_cpus); +- } ++ virt_acpi_dsdt_cpu_cppc(NULL, i, smp_cpus, dev); + + aml_append(scope, dev); + } +@@ -810,7 +817,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms->smp_cpus, &memmap[VIRT_CPUFREQ]); ++ acpi_dsdt_add_cpus(scope, vms->smp_cpus, vms); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); +diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h +index adcb3a816c..2952914569 100644 +--- a/include/hw/acpi/acpi_dev_interface.h ++++ b/include/hw/acpi/acpi_dev_interface.h +@@ -3,6 +3,7 @@ + + #include "qom/object.h" + #include "hw/boards.h" ++#include "hw/acpi/aml-build.h" + + /* These values are part of guest ABI, and can not be changed */ + typedef enum { +@@ -55,5 +56,6 @@ typedef struct AcpiDeviceIfClass { + void (*send_event)(AcpiDeviceIf *adev, AcpiEventStatusBits ev); + void (*madt_cpu)(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *apic_ids, GArray *entry); ++ void (*cpu_cppc)(AcpiDeviceIf *adev, int uid, int num_cpu, Aml *dev); + } AcpiDeviceIfClass; + #endif +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 6b1f10b231..cbdea7ff32 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -157,6 +157,8 @@ typedef struct { + void virt_acpi_setup(VirtMachineState *vms); + void virt_madt_cpu_entry(AcpiDeviceIf *adev, int uid, + const CPUArchIdList *cpu_list, GArray *entry); ++void virt_acpi_dsdt_cpu_cppc(AcpiDeviceIf *adev, int uid, ++ int num_cpu, Aml *dev); + + /* Return the number of used redistributor regions */ + static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) +-- +2.19.1 diff --git a/arm-virt-gic-Construct-irqs-connection-from-create_g.patch b/arm-virt-gic-Construct-irqs-connection-from-create_g.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e9506425f1de938d2ba70a3fb31a83561a4154e --- /dev/null +++ b/arm-virt-gic-Construct-irqs-connection-from-create_g.patch @@ -0,0 +1,123 @@ +From 92124743f4560c490780a229f53ea5881f706383 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 5 Apr 2020 15:29:16 +0800 +Subject: [PATCH] arm/virt/gic: Construct irqs connection from create_gic + +Make the irqs can be connected to for individual CPU. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 90 ++++++++++++++++++++++++++++----------------------- + 1 file changed, 49 insertions(+), 41 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 83f4887e57..55d403bad6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -706,6 +706,54 @@ static void create_v2m(VirtMachineState *vms) + fdt_add_v2m_gic_node(vms); + } + ++static void connect_gic_cpu_irqs(VirtMachineState *vms, int i) ++{ ++ DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); ++ SysBusDevice *gicbusdev = SYS_BUS_DEVICE(vms->gic); ++ int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; ++ int num_cpus = object_property_get_uint(OBJECT(vms->gic), "num-cpu", NULL); ++ int gic_type = vms->gic_version; ++ int irq; ++ /* Mapping from the output timer irq lines from the CPU to the ++ * GIC PPI inputs we use for the virt board. ++ */ ++ const int timer_irq[] = { ++ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, ++ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, ++ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, ++ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, ++ }; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_connect_gpio_out(cpudev, irq, ++ qdev_get_gpio_in(vms->gic, ++ ppibase + timer_irq[irq])); ++ } ++ ++ if (gic_type == 3) { ++ qemu_irq irq = qdev_get_gpio_in(vms->gic, ++ ppibase + ARCH_GIC_MAINT_IRQ); ++ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0, irq); ++ } else if (vms->virt) { ++ qemu_irq irq = qdev_get_gpio_in(vms->gic, ++ ppibase + ARCH_GIC_MAINT_IRQ); ++ sysbus_connect_irq(gicbusdev, i + 4 * num_cpus, irq); ++ } ++ ++ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, ++ qdev_get_gpio_in(vms->gic, ppibase ++ + VIRTUAL_PMU_IRQ)); ++ ++ sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); ++ sysbus_connect_irq(gicbusdev, i + num_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); ++ sysbus_connect_irq(gicbusdev, i + 2 * num_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); ++ sysbus_connect_irq(gicbusdev, i + 3 * num_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++} ++ + static void create_gic(VirtMachineState *vms) + { + MachineState *ms = MACHINE(vms); +@@ -775,47 +823,7 @@ static void create_gic(VirtMachineState *vms) + * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. + */ + for (i = 0; i < smp_cpus; i++) { +- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); +- int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS; +- int irq; +- /* Mapping from the output timer irq lines from the CPU to the +- * GIC PPI inputs we use for the virt board. +- */ +- const int timer_irq[] = { +- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, +- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, +- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, +- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, +- }; +- +- for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +- qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(vms->gic, +- ppibase + timer_irq[irq])); +- } +- +- if (type == 3) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- ppibase + ARCH_GIC_MAINT_IRQ); +- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", +- 0, irq); +- } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- ppibase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); +- } +- +- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(vms->gic, ppibase +- + VIRTUAL_PMU_IRQ)); +- +- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++ connect_gic_cpu_irqs(vms, i); + } + + fdt_add_gic_node(vms); +-- +2.19.1 diff --git a/async-use-explicit-memory-barriers.patch b/async-use-explicit-memory-barriers.patch new file mode 100644 index 0000000000000000000000000000000000000000..7fb68c949ad4e95cc0f908c44042096b69cf9295 --- /dev/null +++ b/async-use-explicit-memory-barriers.patch @@ -0,0 +1,171 @@ +From 787af8ed2bc86dc8688727d62a251965d9c42e00 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 10 Apr 2020 16:19:50 +0000 +Subject: [PATCH 2/2] async: use explicit memory barriers + +When using C11 atomics, non-seqcst reads and writes do not participate +in the total order of seqcst operations. In util/async.c and util/aio-posix.c, +in particular, the pattern that we use + + write ctx->notify_me write bh->scheduled + read bh->scheduled read ctx->notify_me + if !bh->scheduled, sleep if ctx->notify_me, notify + +needs to use seqcst operations for both the write and the read. In +general this is something that we do not want, because there can be +many sources that are polled in addition to bottom halves. The +alternative is to place a seqcst memory barrier between the write +and the read. This also comes with a disadvantage, in that the +memory barrier is implicit on strongly-ordered architectures and +it wastes a few dozen clock cycles. + +Fortunately, ctx->notify_me is never written concurrently by two +threads, so we can assert that and relax the writes to ctx->notify_me. +The resulting solution works and performs well on both aarch64 and x86. + +Note that the atomic_set/atomic_read combination is not an atomic +read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED; +on x86, ATOMIC_RELAXED compiles to a locked operation. + +upstream_url: https://patchwork.kernel.org/patch/11482103/ + +Analyzed-by: Ying Fang +Signed-off-by: Paolo Bonzini +Tested-by: Ying Fang +Message-Id: <20200407140746.8041-6-pbonzini@redhat.com> +Signed-off-by: Stefan Hajnoczi +--- + util/aio-posix.c | 16 ++++++++++++++-- + util/aio-win32.c | 17 ++++++++++++++--- + util/async.c | 16 ++++++++++++---- + 3 files changed, 40 insertions(+), 9 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 6fbfa792..ca58b9a4 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -613,6 +613,11 @@ bool aio_poll(AioContext *ctx, bool blocking) + int64_t timeout; + int64_t start = 0; + ++ /* ++ * There cannot be two concurrent aio_poll calls for the same AioContext (or ++ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). ++ * We rely on this below to avoid slow locked accesses to ctx->notify_me. ++ */ + assert(in_aio_context_home_thread(ctx)); + + /* aio_notify can avoid the expensive event_notifier_set if +@@ -623,7 +628,13 @@ bool aio_poll(AioContext *ctx, bool blocking) + * so disable the optimization now. + */ + if (blocking) { +- atomic_add(&ctx->notify_me, 2); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + } + + qemu_lockcnt_inc(&ctx->list_lock); +@@ -668,7 +679,8 @@ bool aio_poll(AioContext *ctx, bool blocking) + } + + if (blocking) { +- atomic_sub(&ctx->notify_me, 2); ++ /* Finish the poll before clearing the flag. */ ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); + } + +diff --git a/util/aio-win32.c b/util/aio-win32.c +index a23b9c36..729d533f 100644 +--- a/util/aio-win32.c ++++ b/util/aio-win32.c +@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking) + int count; + int timeout; + ++ /* ++ * There cannot be two concurrent aio_poll calls for the same AioContext (or ++ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). ++ * We rely on this below to avoid slow locked accesses to ctx->notify_me. ++ */ ++ assert(in_aio_context_home_thread(ctx)); + progress = false; + + /* aio_notify can avoid the expensive event_notifier_set if +@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking) + * so disable the optimization now. + */ + if (blocking) { +- atomic_add(&ctx->notify_me, 2); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + } + + qemu_lockcnt_inc(&ctx->list_lock); +@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking) + ret = WaitForMultipleObjects(count, events, FALSE, timeout); + if (blocking) { + assert(first); +- assert(in_aio_context_home_thread(ctx)); +- atomic_sub(&ctx->notify_me, 2); ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); + } + +diff --git a/util/async.c b/util/async.c +index afc17fb3..12b33204 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout) + { + AioContext *ctx = (AioContext *) source; + +- atomic_or(&ctx->notify_me, 1); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1); ++ ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + + /* We assume there is no timeout already supplied */ + *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); +@@ -239,7 +246,8 @@ aio_ctx_check(GSource *source) + AioContext *ctx = (AioContext *) source; + QEMUBH *bh; + +- atomic_and(&ctx->notify_me, ~1); ++ /* Finish computing the timeout before clearing the flag. */ ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1); + aio_notify_accept(ctx); + + for (bh = ctx->first_bh; bh; bh = bh->next) { +@@ -344,10 +352,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs +- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. ++ * with smp_mb in aio_ctx_prepare or aio_poll. + */ + smp_mb(); +- if (ctx->notify_me) { ++ if (atomic_read(&ctx->notify_me)) { + event_notifier_set(&ctx->notifier); + atomic_mb_set(&ctx->notified, true); + } +-- +2.25.2 + diff --git a/ati-check-x-y-display-parameter-values.patch b/ati-check-x-y-display-parameter-values.patch new file mode 100644 index 0000000000000000000000000000000000000000..22a38b28067a612f181c4fc0a2cf1b9f6d049493 --- /dev/null +++ b/ati-check-x-y-display-parameter-values.patch @@ -0,0 +1,53 @@ +From 9557ba506470517668ffecb4d5ef4804eca4fd88 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Wed, 18 Nov 2020 10:22:32 +0800 +Subject: [PATCH] ati: check x y display parameter values + +fix CVE-2020-24352 + +The source and destination x,y display parameters in ati_2d_blt() +may run off the vga limits if either of s->regs.[src|dst]_[xy] is +zero. Check the parameter values to avoid potential crash. + +Reported-by: Gaoning Pan +Signed-off-by: Prasad J Pandit +Message-id: 20201021103818.1704030-1-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann + +cherry-pick from commit ca1f9cbfdce4d63b10d57de80fef89a89d92a540 +Signed-off-by: Jiajie Li +--- + hw/display/ati_2d.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c +index 23a8ae0cd8..4dc10ea795 100644 +--- a/hw/display/ati_2d.c ++++ b/hw/display/ati_2d.c +@@ -75,8 +75,9 @@ void ati_2d_blt(ATIVGAState *s) + dst_stride *= bpp; + } + uint8_t *end = s->vga.vram_ptr + s->vga.vram_size; +- if (dst_bits >= end || dst_bits + dst_x + (dst_y + s->regs.dst_height) * +- dst_stride >= end) { ++ if (dst_x > 0x3fff || dst_y > 0x3fff || dst_bits >= end ++ || dst_bits + dst_x ++ + (dst_y + s->regs.dst_height) * dst_stride >= end) { + qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); + return; + } +@@ -107,8 +108,9 @@ void ati_2d_blt(ATIVGAState *s) + src_bits += s->regs.crtc_offset & 0x07ffffff; + src_stride *= bpp; + } +- if (src_bits >= end || src_bits + src_x + +- (src_y + s->regs.dst_height) * src_stride >= end) { ++ if (src_x > 0x3fff || src_y > 0x3fff || src_bits >= end ++ || src_bits + src_x ++ + (src_y + s->regs.dst_height) * src_stride >= end) { + qemu_log_mask(LOG_UNIMP, "blt outside vram not implemented\n"); + return; + } +-- +2.27.0 + diff --git a/ati-use-vga_read_byte-in-ati_cursor_define.patch b/ati-use-vga_read_byte-in-ati_cursor_define.patch new file mode 100644 index 0000000000000000000000000000000000000000..a4b7e806600c341d865ca6acaf75ed4ce6b9c9b8 --- /dev/null +++ b/ati-use-vga_read_byte-in-ati_cursor_define.patch @@ -0,0 +1,198 @@ +From 1ebe0e71d04bfdc76777a3a672e873f006d207e2 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Fri, 5 Feb 2021 10:38:24 +0800 +Subject: [PATCH] ati: use vga_read_byte in ati_cursor_define +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +fix CVE-2019-20808 + +This makes sure reads are confined to vga video memory. + +v3: use uint32_t, fix cut+paste bug. +v2: fix ati_cursor_draw_line too. + +Reported-by: xu hang +Signed-off-by: Gerd Hoffmann +Reviewed-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20190917111441.27405-3-kraxel@redhat.com + +cherry-pick from aab0e2a661b2b6bf7915c0aefe807fb60d6d9d13 +Signed-off-by: Jiajie Li +--- + hw/display/ati.c | 21 ++++++++--------- + hw/display/vga-access.h | 49 ++++++++++++++++++++++++++++++++++++++++ + hw/display/vga-helpers.h | 27 +--------------------- + 3 files changed, 60 insertions(+), 37 deletions(-) + create mode 100644 hw/display/vga-access.h + +diff --git a/hw/display/ati.c b/hw/display/ati.c +index 5943040416..b17569874e 100644 +--- a/hw/display/ati.c ++++ b/hw/display/ati.c +@@ -19,6 +19,7 @@ + #include "qemu/osdep.h" + #include "ati_int.h" + #include "ati_regs.h" ++#include "vga-access.h" + #include "vga_regs.h" + #include "qemu/log.h" + #include "qemu/module.h" +@@ -125,20 +126,19 @@ static void ati_vga_switch_mode(ATIVGAState *s) + static void ati_cursor_define(ATIVGAState *s) + { + uint8_t data[1024]; +- uint8_t *src; ++ uint32_t srcoff; + int i, j, idx = 0; + + if ((s->regs.cur_offset & BIT(31)) || s->cursor_guest_mode) { + return; /* Do not update cursor if locked or rendered by guest */ + } + /* FIXME handle cur_hv_offs correctly */ +- src = s->vga.vram_ptr + (s->regs.crtc_offset & 0x07ffffff) + +- s->regs.cur_offset - (s->regs.cur_hv_offs >> 16) - +- (s->regs.cur_hv_offs & 0xffff) * 16; ++ srcoff = s->regs.cur_offset - ++ (s->regs.cur_hv_offs >> 16) - (s->regs.cur_hv_offs & 0xffff) * 16; + for (i = 0; i < 64; i++) { + for (j = 0; j < 8; j++, idx++) { +- data[idx] = src[i * 16 + j]; +- data[512 + idx] = src[i * 16 + j + 8]; ++ data[idx] = vga_read_byte(&s->vga, srcoff + i * 16 + j); ++ data[512 + idx] = vga_read_byte(&s->vga, srcoff + i * 16 + j + 8); + } + } + if (!s->cursor) { +@@ -180,7 +180,7 @@ static void ati_cursor_invalidate(VGACommonState *vga) + static void ati_cursor_draw_line(VGACommonState *vga, uint8_t *d, int scr_y) + { + ATIVGAState *s = container_of(vga, ATIVGAState, vga); +- uint8_t *src; ++ uint32_t srcoff; + uint32_t *dp = (uint32_t *)d; + int i, j, h; + +@@ -190,14 +190,13 @@ static void ati_cursor_draw_line(VGACommonState *vga, uint8_t *d, int scr_y) + return; + } + /* FIXME handle cur_hv_offs correctly */ +- src = s->vga.vram_ptr + (s->regs.crtc_offset & 0x07ffffff) + +- s->cursor_offset + (scr_y - vga->hw_cursor_y) * 16; ++ srcoff = s->cursor_offset + (scr_y - vga->hw_cursor_y) * 16; + dp = &dp[vga->hw_cursor_x]; + h = ((s->regs.crtc_h_total_disp >> 16) + 1) * 8; + for (i = 0; i < 8; i++) { + uint32_t color; +- uint8_t abits = src[i]; +- uint8_t xbits = src[i + 8]; ++ uint8_t abits = vga_read_byte(vga, srcoff + i); ++ uint8_t xbits = vga_read_byte(vga, srcoff + i + 8); + for (j = 0; j < 8; j++, abits <<= 1, xbits <<= 1) { + if (abits & BIT(7)) { + if (xbits & BIT(7)) { +diff --git a/hw/display/vga-access.h b/hw/display/vga-access.h +new file mode 100644 +index 0000000000..c0fbd9958b +--- /dev/null ++++ b/hw/display/vga-access.h +@@ -0,0 +1,49 @@ ++/* ++ * QEMU VGA Emulator templates ++ * ++ * Copyright (c) 2003 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr) ++{ ++ return vga->vram_ptr[addr & vga->vbe_size_mask]; ++} ++ ++static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr) ++{ ++ uint32_t offset = addr & vga->vbe_size_mask & ~1; ++ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); ++ return lduw_le_p(ptr); ++} ++ ++static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr) ++{ ++ uint32_t offset = addr & vga->vbe_size_mask & ~1; ++ uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); ++ return lduw_be_p(ptr); ++} ++ ++static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr) ++{ ++ uint32_t offset = addr & vga->vbe_size_mask & ~3; ++ uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset); ++ return ldl_le_p(ptr); ++} +diff --git a/hw/display/vga-helpers.h b/hw/display/vga-helpers.h +index 5a752b3f9e..5b6c02faa6 100644 +--- a/hw/display/vga-helpers.h ++++ b/hw/display/vga-helpers.h +@@ -21,6 +21,7 @@ + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ ++#include "vga-access.h" + + static inline void vga_draw_glyph_line(uint8_t *d, uint32_t font_data, + uint32_t xorcol, uint32_t bgcol) +@@ -95,32 +96,6 @@ static void vga_draw_glyph9(uint8_t *d, int linesize, + } while (--h); + } + +-static inline uint8_t vga_read_byte(VGACommonState *vga, uint32_t addr) +-{ +- return vga->vram_ptr[addr & vga->vbe_size_mask]; +-} +- +-static inline uint16_t vga_read_word_le(VGACommonState *vga, uint32_t addr) +-{ +- uint32_t offset = addr & vga->vbe_size_mask & ~1; +- uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); +- return lduw_le_p(ptr); +-} +- +-static inline uint16_t vga_read_word_be(VGACommonState *vga, uint32_t addr) +-{ +- uint32_t offset = addr & vga->vbe_size_mask & ~1; +- uint16_t *ptr = (uint16_t *)(vga->vram_ptr + offset); +- return lduw_be_p(ptr); +-} +- +-static inline uint32_t vga_read_dword_le(VGACommonState *vga, uint32_t addr) +-{ +- uint32_t offset = addr & vga->vbe_size_mask & ~3; +- uint32_t *ptr = (uint32_t *)(vga->vram_ptr + offset); +- return ldl_le_p(ptr); +-} +- + /* + * 4 color mode + */ +-- +2.27.0 + diff --git a/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch b/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch new file mode 100644 index 0000000000000000000000000000000000000000..ef1d8b646c607a5afb6278ab229efc4a5a15965f --- /dev/null +++ b/ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch @@ -0,0 +1,91 @@ +From ac2071c3791b67fc7af78b8ceb320c01ca1b5df7 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Mon, 6 Apr 2020 22:34:26 +0200 +Subject: [PATCH] ati-vga: Fix checks in ati_2d_blt() to avoid crash + +In some corner cases (that never happen during normal operation but a +malicious guest could program wrong values) pixman functions were +called with parameters that result in a crash. Fix this and add more +checks to disallow such cases. + +Reported-by: Ziming Zhang +Signed-off-by: BALATON Zoltan +Message-id: 20200406204029.19559747D5D@zero.eik.bme.hu +Signed-off-by: Gerd Hoffmann + +diff --git a/hw/display/ati_2d.c b/hw/display/ati_2d.c +index 42e82311eb..23a8ae0cd8 100644 +--- a/hw/display/ati_2d.c ++++ b/hw/display/ati_2d.c +@@ -53,12 +53,20 @@ void ati_2d_blt(ATIVGAState *s) + s->vga.vbe_start_addr, surface_data(ds), surface_stride(ds), + surface_bits_per_pixel(ds), + (s->regs.dp_mix & GMC_ROP3_MASK) >> 16); +- int dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? +- s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width); +- int dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? +- s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height); ++ unsigned dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? ++ s->regs.dst_x : s->regs.dst_x + 1 - s->regs.dst_width); ++ unsigned dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? ++ s->regs.dst_y : s->regs.dst_y + 1 - s->regs.dst_height); + int bpp = ati_bpp_from_datatype(s); ++ if (!bpp) { ++ qemu_log_mask(LOG_GUEST_ERROR, "Invalid bpp\n"); ++ return; ++ } + int dst_stride = DEFAULT_CNTL ? s->regs.dst_pitch : s->regs.default_pitch; ++ if (!dst_stride) { ++ qemu_log_mask(LOG_GUEST_ERROR, "Zero dest pitch\n"); ++ return; ++ } + uint8_t *dst_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? + s->regs.dst_offset : s->regs.default_offset); + +@@ -82,12 +90,16 @@ void ati_2d_blt(ATIVGAState *s) + switch (s->regs.dp_mix & GMC_ROP3_MASK) { + case ROP3_SRCCOPY: + { +- int src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? +- s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); +- int src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? +- s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height); ++ unsigned src_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? ++ s->regs.src_x : s->regs.src_x + 1 - s->regs.dst_width); ++ unsigned src_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? ++ s->regs.src_y : s->regs.src_y + 1 - s->regs.dst_height); + int src_stride = DEFAULT_CNTL ? + s->regs.src_pitch : s->regs.default_pitch; ++ if (!src_stride) { ++ qemu_log_mask(LOG_GUEST_ERROR, "Zero source pitch\n"); ++ return; ++ } + uint8_t *src_bits = s->vga.vram_ptr + (DEFAULT_CNTL ? + s->regs.src_offset : s->regs.default_offset); + +@@ -137,8 +149,10 @@ void ati_2d_blt(ATIVGAState *s) + dst_y * surface_stride(ds), + s->regs.dst_height * surface_stride(ds)); + } +- s->regs.dst_x += s->regs.dst_width; +- s->regs.dst_y += s->regs.dst_height; ++ s->regs.dst_x = (s->regs.dp_cntl & DST_X_LEFT_TO_RIGHT ? ++ dst_x + s->regs.dst_width : dst_x); ++ s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? ++ dst_y + s->regs.dst_height : dst_y); + break; + } + case ROP3_PATCOPY: +@@ -179,7 +193,8 @@ void ati_2d_blt(ATIVGAState *s) + dst_y * surface_stride(ds), + s->regs.dst_height * surface_stride(ds)); + } +- s->regs.dst_y += s->regs.dst_height; ++ s->regs.dst_y = (s->regs.dp_cntl & DST_Y_TOP_TO_BOTTOM ? ++ dst_y + s->regs.dst_height : dst_y); + break; + } + default: +-- +2.23.0 + diff --git a/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch b/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch new file mode 100644 index 0000000000000000000000000000000000000000..b80c9dc973015dd83e3d9c0c000dd0b15b303608 --- /dev/null +++ b/ati-vga-check-mm_index-before-recursive-call-CVE-202.patch @@ -0,0 +1,59 @@ +From 89554d2f71d4c79c5d8e804d90d74f3985d7ded5 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 4 Jun 2020 14:38:30 +0530 +Subject: [PATCH 3/9] ati-vga: check mm_index before recursive call + (CVE-2020-13800) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +While accessing VGA registers via ati_mm_read/write routines, +a guest may set 's->regs.mm_index' such that it leads to infinite +recursion. Check mm_index value to avoid such recursion. Log an +error message for wrong values. + +Reported-by: Ren Ding +Reported-by: Hanqing Zhao +Reported-by: Yi Ren +Message-id: 20200604090830.33885-1-ppandit@redhat.com +Suggested-by: BALATON Zoltan +Suggested-by: Philippe Mathieu-Daudé +Signed-off-by: Prasad J Pandit +Signed-off-by: Gerd Hoffmann +--- + hw/display/ati.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/display/ati.c b/hw/display/ati.c +index a747c4cc98..5943040416 100644 +--- a/hw/display/ati.c ++++ b/hw/display/ati.c +@@ -261,8 +261,11 @@ static uint64_t ati_mm_read(void *opaque, hwaddr addr, unsigned int size) + if (idx <= s->vga.vram_size - size) { + val = ldn_le_p(s->vga.vram_ptr + idx, size); + } +- } else { ++ } else if (s->regs.mm_index > MM_DATA + 3) { + val = ati_mm_read(s, s->regs.mm_index + addr - MM_DATA, size); ++ } else { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "ati_mm_read: mm_index too small: %u\n", s->regs.mm_index); + } + break; + case BIOS_0_SCRATCH ... BUS_CNTL - 1: +@@ -472,8 +475,11 @@ static void ati_mm_write(void *opaque, hwaddr addr, + if (idx <= s->vga.vram_size - size) { + stn_le_p(s->vga.vram_ptr + idx, size, data); + } +- } else { ++ } else if (s->regs.mm_index > MM_DATA + 3) { + ati_mm_write(s, s->regs.mm_index + addr - MM_DATA, data, size); ++ } else { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "ati_mm_write: mm_index too small: %u\n", s->regs.mm_index); + } + break; + case BIOS_0_SCRATCH ... BUS_CNTL - 1: +-- +2.25.1 + diff --git a/audio-fix-integer-overflow.patch b/audio-fix-integer-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..91f5280f1854634460e43b48ae98a4f5eb57b26c --- /dev/null +++ b/audio-fix-integer-overflow.patch @@ -0,0 +1,37 @@ +From d0c4e8cc25dc3bfed1659c35fb59b2f0418ba1d5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Volker=20R=C3=BCmelin?= +Date: Thu, 19 Dec 2019 21:34:05 +0100 +Subject: [PATCH 2/8] audio: fix integer overflow +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Tell the compiler to do a 32bit * 32bit -> 64bit multiplication +because period_ticks is a 64bit variable. The overflow occurs +for audio timer periods larger than 4294967us. + +Fixes: be1092afa0 "audio: fix audio timer rate conversion bug" + +Signed-off-by: Volker Rümelin +Message-id: 8893a235-66a8-8fbe-7d95-862e29da90b1@t-online.de +Signed-off-by: Gerd Hoffmann +--- + audio/audio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/audio/audio.c b/audio/audio.c +index 05adf7f..efcb5d4 100644 +--- a/audio/audio.c ++++ b/audio/audio.c +@@ -1473,7 +1473,7 @@ static int audio_init(Audiodev *dev) + if (dev->timer_period <= 0) { + s->period_ticks = 1; + } else { +- s->period_ticks = dev->timer_period * SCALE_US; ++ s->period_ticks = dev->timer_period * (int64_t)SCALE_US; + } + + e = qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s); +-- +1.8.3.1 + diff --git a/backup-Improve-error-for-bdrv_getlength-failure.patch b/backup-Improve-error-for-bdrv_getlength-failure.patch new file mode 100644 index 0000000000000000000000000000000000000000..df188942c913062b499c1d6579556784661b2985 --- /dev/null +++ b/backup-Improve-error-for-bdrv_getlength-failure.patch @@ -0,0 +1,51 @@ +From 0b66aef5389d622434128fc7db9abd2cd4724b51 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:19 +0100 +Subject: [PATCH] backup: Improve error for bdrv_getlength() failure + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-6-kwolf@redhat.com> +Patchwork-id: 97103 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 05/11] backup: Improve error for bdrv_getlength() failure +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +bdrv_get_device_name() will be an empty string with modern management +tools that don't use -drive. Use bdrv_get_device_or_node_name() instead +so that the node name is used if the BlockBackend is anonymous. + +While at it, start with upper case to make the message consistent with +the rest of the function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Alberto Garcia +Message-Id: <20200430142755.315494-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 58226634c4b02af7b10862f7fbd3610a344bfb7f) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/backup.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 8761f1f9a7..88354dcb32 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -613,8 +613,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + + len = bdrv_getlength(bs); + if (len < 0) { +- error_setg_errno(errp, -len, "unable to get length for '%s'", +- bdrv_get_device_name(bs)); ++ error_setg_errno(errp, -len, "Unable to get length for '%s'", ++ bdrv_get_device_or_node_name(bs)); + goto error; + } + +-- +2.27.0 + diff --git a/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch b/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch new file mode 100644 index 0000000000000000000000000000000000000000..1dc656892b5f124d3ad732aed9c31b0f71a3363b --- /dev/null +++ b/bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch @@ -0,0 +1,24 @@ +From 2892a4b1f7dfc75e06d0ce770d44a062b6334eb0 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 15 Apr 2020 17:03:54 +0800 +Subject: [PATCH] bios-tables-test: prepare to change ARM virt ACPI DSDT + +We will change ARM virt ACPI DSDT table in order to add the cpufreq device, +which use ACPI CPPC to show CPU frequency info to guest. + +Signed-off-by: Ying Fang +--- + tests/bios-tables-test-allowed-diff.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h +index dfb8523c..32a401ae 100644 +--- a/tests/bios-tables-test-allowed-diff.h ++++ b/tests/bios-tables-test-allowed-diff.h +@@ -1 +1,4 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.numamem", +-- +2.23.0 diff --git a/block-Add-bdrv_co_get_self_request.patch b/block-Add-bdrv_co_get_self_request.patch new file mode 100644 index 0000000000000000000000000000000000000000..4972f084649f70253978ad8fb1d3842bbf741d81 --- /dev/null +++ b/block-Add-bdrv_co_get_self_request.patch @@ -0,0 +1,59 @@ +From d9b88f7e0d56feb4d7daa2506e2756fc48e975a1 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:09 +0100 +Subject: [PATCH] block: Add bdrv_co_get_self_request() + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-3-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit c28107e9e55b11cd35cf3dc2505e3e69d10dcf13) +Signed-off-by: Michael Roth +--- + block/io.c | 18 ++++++++++++++++++ + include/block/block_int.h | 1 + + 2 files changed, 19 insertions(+) + +diff --git a/block/io.c b/block/io.c +index d4ceaaa2ce..65b5102714 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -721,6 +721,24 @@ static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) + (req->bytes == req->overlap_bytes); + } + ++/** ++ * Return the tracked request on @bs for the current coroutine, or ++ * NULL if there is none. ++ */ ++BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) ++{ ++ BdrvTrackedRequest *req; ++ Coroutine *self = qemu_coroutine_self(); ++ ++ QLIST_FOREACH(req, &bs->tracked_requests, list) { ++ if (req->co == self) { ++ return req; ++ } ++ } ++ ++ return NULL; ++} ++ + /** + * Round a region to cluster boundaries + */ +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 4465b02242..05ee6b4866 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -964,6 +964,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + + bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); + void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); ++BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); + + int get_tmp_filename(char *filename, int size); + BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, +-- +2.23.0 diff --git a/block-Add-error-retry-param-setting.patch b/block-Add-error-retry-param-setting.patch new file mode 100644 index 0000000000000000000000000000000000000000..72f214b1283635a32308e67ba095ff981368ea2e --- /dev/null +++ b/block-Add-error-retry-param-setting.patch @@ -0,0 +1,226 @@ +From 3464a135565d718d0fedadd67081a0f76d81a9c6 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:50 +0800 +Subject: [PATCH] block: Add error retry param setting + +Add "retry_interval" and "retry_timeout" parameter for drive and device +option. These parameter are valid only when werror/rerror=retry. + +eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000 + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 13 +++++++-- + blockdev.c | 50 ++++++++++++++++++++++++++++++++++ + hw/block/block.c | 10 +++++++ + include/hw/block/block.h | 7 ++++- + include/sysemu/block-backend.h | 5 ++++ + 5 files changed, 81 insertions(+), 4 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 0fe99ffe52..2d812e2254 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -31,9 +31,6 @@ + + static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + +-/* block backend default retry interval */ +-#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 +- + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -1633,6 +1630,16 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval) ++{ ++ blk->retry_interval = interval; ++} ++ ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout) ++{ ++ blk->retry_timeout = timeout; ++} ++ + static bool blk_error_retry_timeout(BlockBackend *blk) + { + /* No timeout set, infinite retries. */ +diff --git a/blockdev.c b/blockdev.c +index 0f49fd290e..99c92b96d2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -470,6 +470,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + const char *buf; + int bdrv_flags = 0; + int on_read_error, on_write_error; ++ int64_t retry_interval, retry_timeout; + bool account_invalid, account_failed; + bool writethrough, read_only; + BlockBackend *blk; +@@ -565,6 +566,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + } + } + ++ retry_interval = qemu_opt_get_number(opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0); ++ + if (snapshot) { + bdrv_flags |= BDRV_O_SNAPSHOT; + } +@@ -629,6 +634,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + blk_set_enable_write_cache(blk, !writethrough); + blk_set_on_error(blk, on_read_error, on_write_error); ++ if (on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ on_write_error == BLOCKDEV_ON_ERROR_RETRY) { ++ blk_set_on_error_retry_interval(blk, retry_interval); ++ blk_set_on_error_retry_timeout(blk, retry_timeout); ++ } + + if (!monitor_add_blk(blk, id, errp)) { + blk_unref(blk); +@@ -754,6 +764,14 @@ QemuOptsList qemu_legacy_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = "copy-on-read", + .type = QEMU_OPT_BOOL, +@@ -776,6 +794,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + BlockInterfaceType type; + int max_devs, bus_id, unit_id, index; + const char *werror, *rerror; ++ int64_t retry_interval, retry_timeout; + bool read_only = false; + bool copy_on_read; + const char *filename; +@@ -992,6 +1011,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + qdict_put_str(bs_opts, "rerror", rerror); + } + ++ if (qemu_opt_find(legacy_opts, "retry_interval")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_interval is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ qdict_put_int(bs_opts, "retry_interval", retry_interval); ++ } ++ ++ if (qemu_opt_find(legacy_opts, "retry_timeout")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_timeout is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0); ++ qdict_put_int(bs_opts, "retry_timeout", retry_timeout); ++ } ++ + /* Actual block device init: Functionality shared with blockdev-add */ + blk = blockdev_init(filename, bs_opts, &local_err); + bs_opts = NULL; +@@ -4593,6 +4635,14 @@ QemuOptsList qemu_common_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, +diff --git a/hw/block/block.c b/hw/block/block.c +index bf56c7612b..56141940ca 100644 +--- a/hw/block/block.c ++++ b/hw/block/block.c +@@ -134,6 +134,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, + blk_set_enable_write_cache(blk, wce); + blk_set_on_error(blk, rerror, werror); + ++ if (rerror == BLOCKDEV_ON_ERROR_RETRY || ++ werror == BLOCKDEV_ON_ERROR_RETRY) { ++ if (conf->retry_interval >= 0) { ++ blk_set_on_error_retry_interval(blk, conf->retry_interval); ++ } ++ if (conf->retry_timeout >= 0) { ++ blk_set_on_error_retry_timeout(blk, conf->retry_timeout); ++ } ++ } ++ + return true; + } + +diff --git a/include/hw/block/block.h b/include/hw/block/block.h +index 607539057a..d12603aabd 100644 +--- a/include/hw/block/block.h ++++ b/include/hw/block/block.h +@@ -30,6 +30,8 @@ typedef struct BlockConf { + bool share_rw; + BlockdevOnError rerror; + BlockdevOnError werror; ++ int64_t retry_interval; ++ int64_t retry_timeout; + } BlockConf; + + static inline unsigned int get_physical_block_exp(BlockConf *conf) +@@ -71,7 +73,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) + DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ +- BLOCKDEV_ON_ERROR_AUTO) ++ BLOCKDEV_ON_ERROR_AUTO), \ ++ DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ ++ -1), \ ++ DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) + + /* Backend access helpers */ + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 58dde446ca..dc10e507ae 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -25,6 +25,9 @@ + */ + #include "block/block.h" + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + /* Callbacks for block device models */ + typedef struct BlockDevOps { + /* +@@ -184,6 +187,8 @@ void blk_inc_in_flight(BlockBackend *blk); + void blk_dec_in_flight(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout); + void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +-- +2.27.0 + diff --git a/block-Add-sanity-check-when-setting-retry-parameters.patch b/block-Add-sanity-check-when-setting-retry-parameters.patch new file mode 100644 index 0000000000000000000000000000000000000000..0af7b6e14e2be698821105d386c4c39893c67a83 --- /dev/null +++ b/block-Add-sanity-check-when-setting-retry-parameters.patch @@ -0,0 +1,118 @@ +From 6642b2c6fcad2e1099c61b56f4fe78f3180d005e Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 18 Mar 2021 19:45:11 +0800 +Subject: [PATCH] block: Add sanity check when setting retry parameters + +Add sanity check when setting retry parameters to avoid invalid retry +configuration. + +Signed-off-by: Jiahui Cen +--- + hw/core/qdev-properties.c | 45 ++++++++++++++++++++++++++++++++++++ + include/hw/block/block.h | 7 +++--- + include/hw/qdev-properties.h | 8 +++++++ + 3 files changed, 57 insertions(+), 3 deletions(-) + +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 709f9e0f9d..2601091f8f 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -628,6 +628,51 @@ const PropertyInfo qdev_prop_blockdev_on_error = { + .set_default_value = set_default_value_enum, + }; + ++static void set_retry_time(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ DeviceState *dev = DEVICE(obj); ++ Property *prop = opaque; ++ int64_t value, *ptr = qdev_get_prop_ptr(dev, prop); ++ Error *local_err = NULL; ++ ++ if (dev->realized) { ++ qdev_prop_set_after_realize(dev, name, errp); ++ return; ++ } ++ ++ visit_type_int64(v, name, &value, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ /* value should not be negative */ ++ if (value < 0) { ++ error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, ++ dev->id ? : "", name, (int64_t)value, 0L, LONG_MAX); ++ return; ++ } ++ ++ *ptr = value; ++} ++ ++const PropertyInfo qdev_prop_blockdev_retry_interval = { ++ .name = "BlockdevRetryInterval", ++ .description = "Interval for retry error handling policy", ++ .get = get_int64, ++ .set = set_retry_time, ++ .set_default_value = set_default_value_int, ++}; ++ ++const PropertyInfo qdev_prop_blockdev_retry_timeout = { ++ .name = "BlockdevRetryTimeout", ++ .description = "Timeout for retry error handling policy", ++ .get = get_int64, ++ .set = set_retry_time, ++ .set_default_value = set_default_value_int, ++}; ++ + /* --- BIOS CHS translation */ + + QEMU_BUILD_BUG_ON(sizeof(BiosAtaTranslation) != sizeof(int)); +diff --git a/include/hw/block/block.h b/include/hw/block/block.h +index d12603aabd..c5276fec0d 100644 +--- a/include/hw/block/block.h ++++ b/include/hw/block/block.h +@@ -74,9 +74,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ + BLOCKDEV_ON_ERROR_AUTO), \ +- DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ +- -1), \ +- DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) ++ DEFINE_PROP_BLOCKDEV_RETRY_INTERVAL("retry_interval", _state, \ ++ _conf.retry_interval, 1000), \ ++ DEFINE_PROP_BLOCKDEV_RETRY_TIMEOUT("retry_timeout", _state, \ ++ _conf.retry_timeout, 0) + + /* Backend access helpers */ + +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index a22a532eb8..d7742be3bc 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -26,6 +26,8 @@ extern const PropertyInfo qdev_prop_on_off_auto; + extern const PropertyInfo qdev_prop_compress_method; + extern const PropertyInfo qdev_prop_losttickpolicy; + extern const PropertyInfo qdev_prop_blockdev_on_error; ++extern const PropertyInfo qdev_prop_blockdev_retry_interval; ++extern const PropertyInfo qdev_prop_blockdev_retry_timeout; + extern const PropertyInfo qdev_prop_bios_chs_trans; + extern const PropertyInfo qdev_prop_fdc_drive_type; + extern const PropertyInfo qdev_prop_drive; +@@ -215,6 +217,12 @@ extern const PropertyInfo qdev_prop_pcie_link_width; + #define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \ + BlockdevOnError) ++#define DEFINE_PROP_BLOCKDEV_RETRY_INTERVAL(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_retry_interval, \ ++ int64_t) ++#define DEFINE_PROP_BLOCKDEV_RETRY_TIMEOUT(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_retry_timeout, \ ++ int64_t) + #define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) + #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ +-- +2.27.0 + diff --git a/block-Avoid-memleak-on-qcow2-image-info-failure.patch b/block-Avoid-memleak-on-qcow2-image-info-failure.patch new file mode 100644 index 0000000000000000000000000000000000000000..13917f5b61ed267f584feac9041450e6fe9bbca6 --- /dev/null +++ b/block-Avoid-memleak-on-qcow2-image-info-failure.patch @@ -0,0 +1,35 @@ +From 6a39af8880c18fb3bcbfb715aef909c64286524e Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 20 Mar 2020 13:36:20 -0500 +Subject: [PATCH 04/14] block: Avoid memleak on qcow2 image info failure + +If we fail to get bitmap info, we must not leak the encryption info. + +Fixes: b8968c875f403 +Fixes: Coverity CID 1421894 +Signed-off-by: Eric Blake +Message-Id: <20200320183620.1112123-1-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Andrey Shinkevich +Tested-by: Andrey Shinkevich +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + block/qcow2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 27c54b9905aa..0f4b0940d457 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -4588,6 +4588,7 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs, + if (local_err) { + error_propagate(errp, local_err); + qapi_free_ImageInfoSpecific(spec_info); ++ qapi_free_QCryptoBlockInfo(encrypt_info); + return NULL; + } + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ +-- +2.26.2 + diff --git a/block-Call-attention-to-truncation-of-long-NBD-expor.patch b/block-Call-attention-to-truncation-of-long-NBD-expor.patch new file mode 100644 index 0000000000000000000000000000000000000000..91745acf1dd03b8186fd70e5a538014727c7099a --- /dev/null +++ b/block-Call-attention-to-truncation-of-long-NBD-expor.patch @@ -0,0 +1,105 @@ +From e94c1625c0f8155740b1bb7b2c749df759e04526 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 10 Jun 2020 18:32:02 -0400 +Subject: [PATCH] block: Call attention to truncation of long NBD exports + +RH-Author: Eric Blake +Message-id: <20200610183202.3780750-3-eblake@redhat.com> +Patchwork-id: 97495 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block: Call attention to truncation of long NBD exports +Bugzilla: 1845384 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Commit 93676c88 relaxed our NBD client code to request export names up +to the NBD protocol maximum of 4096 bytes without NUL terminator, even +though the block layer can't store anything longer than 4096 bytes +including NUL terminator for display to the user. Since this means +there are some export names where we have to truncate things, we can +at least try to make the truncation a bit more obvious for the user. +Note that in spite of the truncated display name, we can still +communicate with an NBD server using such a long export name; this was +deemed nicer than refusing to even connect to such a server (since the +server may not be under our control, and since determining our actual +length limits gets tricky when nbd://host:port/export and +nbd+unix:///export?socket=/path are themselves variable-length +expansions beyond the export name but count towards the block layer +name length). + +Reported-by: Xueqiang Wei +Fixes: https://bugzilla.redhat.com/1843684 +Signed-off-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200610163741.3745251-3-eblake@redhat.com> +(cherry picked from commit 5c86bdf1208916ece0b87e1151c9b48ee54faa3e) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + block.c | 7 +++++-- + block/nbd.c | 21 +++++++++++++-------- + 2 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/block.c b/block.c +index 38880eabf8..ba36b53a00 100644 +--- a/block.c ++++ b/block.c +@@ -6444,8 +6444,11 @@ void bdrv_refresh_filename(BlockDriverState *bs) + pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename); + } else { + QString *json = qobject_to_json(QOBJECT(bs->full_open_options)); +- snprintf(bs->filename, sizeof(bs->filename), "json:%s", +- qstring_get_str(json)); ++ if (snprintf(bs->filename, sizeof(bs->filename), "json:%s", ++ qstring_get_str(json)) >= sizeof(bs->filename)) { ++ /* Give user a hint if we truncated things. */ ++ strcpy(bs->filename + sizeof(bs->filename) - 4, "..."); ++ } + qobject_unref(json); + } + } +diff --git a/block/nbd.c b/block/nbd.c +index 3977b1efc7..63cdd051ab 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -1714,6 +1714,7 @@ static void nbd_refresh_filename(BlockDriverState *bs) + { + BDRVNBDState *s = bs->opaque; + const char *host = NULL, *port = NULL, *path = NULL; ++ size_t len = 0; + + if (s->saddr->type == SOCKET_ADDRESS_TYPE_INET) { + const InetSocketAddress *inet = &s->saddr->u.inet; +@@ -1726,17 +1727,21 @@ static void nbd_refresh_filename(BlockDriverState *bs) + } /* else can't represent as pseudo-filename */ + + if (path && s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd+unix:///%s?socket=%s", s->export, path); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd+unix:///%s?socket=%s", s->export, path); + } else if (path && !s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd+unix://?socket=%s", path); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd+unix://?socket=%s", path); + } else if (host && s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd://%s:%s/%s", host, port, s->export); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd://%s:%s/%s", host, port, s->export); + } else if (host && !s->export) { +- snprintf(bs->exact_filename, sizeof(bs->exact_filename), +- "nbd://%s:%s", host, port); ++ len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), ++ "nbd://%s:%s", host, port); ++ } ++ if (len > sizeof(bs->exact_filename)) { ++ /* Name is too long to represent exactly, so leave it empty. */ ++ bs->exact_filename[0] = '\0'; + } + } + +-- +2.27.0 + diff --git a/block-Fix-cross-AioContext-blockdev-snapshot.patch b/block-Fix-cross-AioContext-blockdev-snapshot.patch new file mode 100644 index 0000000000000000000000000000000000000000..a4a4d9dbb4c51b74a8258b6368bd9a9ca88b71c6 --- /dev/null +++ b/block-Fix-cross-AioContext-blockdev-snapshot.patch @@ -0,0 +1,78 @@ +From ec96b9f64c239736003413d70dc3999ad0b8271c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 Mar 2020 12:38:29 +0100 +Subject: [PATCH] block: Fix cross-AioContext blockdev-snapshot + +external_snapshot_prepare() tries to move the overlay to the AioContext +of the backing file (the snapshotted node). However, it's possible that +this doesn't work, but the backing file can instead be moved to the +overlay's AioContext (e.g. opening the backing chain for a mirror +target). + +bdrv_append() already indirectly uses bdrv_attach_node(), which takes +care to move nodes to make sure they use the same AioContext and which +tries both directions. + +So the problem has a simple fix: Just delete the unnecessary extra +bdrv_try_set_aio_context() call in external_snapshot_prepare() and +instead assert in bdrv_append() that both nodes were indeed moved to the +same AioContext. + +Signed-off-by: Kevin Wolf +Message-Id: <20200310113831.27293-6-kwolf@redhat.com> +Tested-by: Peter Krempa +Signed-off-by: Kevin Wolf +--- + block.c | 1 + + blockdev.c | 16 ---------------- + 2 files changed, 1 insertion(+), 16 deletions(-) + +diff --git a/block.c b/block.c +index ba36b53a00..824025f781 100644 +--- a/block.c ++++ b/block.c +@@ -4165,6 +4165,7 @@ void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, + bdrv_ref(from); + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); ++ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to)); + bdrv_drained_begin(from); + + /* Put all parents into @list and calculate their cumulative permissions */ +diff --git a/blockdev.c b/blockdev.c +index 79112be2e6..d1a3b6a630 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1578,8 +1578,6 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; +- AioContext *old_context; +- int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar + * purpose but a different set of parameters */ +@@ -1719,20 +1717,6 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + +- /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(state->new_bs); +- aio_context_release(aio_context); +- aio_context_acquire(old_context); +- +- ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- +- if (ret < 0) { +- goto out; +- } +- + /* This removes our old bs and adds the new bs. This is an operation that + * can fail, so we need to do it in .prepare; undoing it for abort is + * always possible. */ +-- +2.27.0 + diff --git a/block-Make-wait-mark-serialising-requests-public.patch b/block-Make-wait-mark-serialising-requests-public.patch new file mode 100644 index 0000000000000000000000000000000000000000..162463c7769093014562846d9d7c0da4e131b5e3 --- /dev/null +++ b/block-Make-wait-mark-serialising-requests-public.patch @@ -0,0 +1,131 @@ +From 590cff8230749794ba09b38f3ea4eb6b0f2f73b5 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:08 +0100 +Subject: [PATCH] block: Make wait/mark serialising requests public + +Make both bdrv_mark_request_serialising() and +bdrv_wait_serialising_requests() public so they can be used from block +drivers. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-2-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 304d9d7f034ff7f5e1e66a65b7f720f63a72c57e) + Conflicts: + block/io.c +*drop context dependency on 1acc3466a2 +Signed-off-by: Michael Roth +--- + block/io.c | 24 ++++++++++++------------ + include/block/block_int.h | 3 +++ + 2 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 07d2d825c3..d4ceaaa2ce 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -694,7 +694,7 @@ static void tracked_request_begin(BdrvTrackedRequest *req, + qemu_co_mutex_unlock(&bs->reqs_lock); + } + +-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) ++void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) + { + int64_t overlap_offset = req->offset & ~(align - 1); + uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) +@@ -784,7 +784,7 @@ void bdrv_dec_in_flight(BlockDriverState *bs) + bdrv_wakeup(bs); + } + +-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) ++bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) + { + BlockDriverState *bs = self->bs; + BdrvTrackedRequest *req; +@@ -1340,14 +1340,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, + * with each other for the same cluster. For example, in copy-on-read + * it ensures that the CoR read and write operations are atomic and + * guest writes cannot interleave between them. */ +- mark_request_serialising(req, bdrv_get_cluster_size(bs)); ++ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } + + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(flags & BDRV_REQ_SERIALISING)); + + if (!(flags & BDRV_REQ_NO_SERIALISING)) { +- wait_serialising_requests(req); ++ bdrv_wait_serialising_requests(req); + } + + if (flags & BDRV_REQ_COPY_ON_READ) { +@@ -1736,10 +1736,10 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, + assert(!(flags & ~BDRV_REQ_MASK)); + + if (flags & BDRV_REQ_SERIALISING) { +- mark_request_serialising(req, bdrv_get_cluster_size(bs)); ++ bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); + } + +- waited = wait_serialising_requests(req); ++ waited = bdrv_wait_serialising_requests(req); + + assert(!waited || !req->serialising || + is_request_serialising_and_aligned(req)); +@@ -1905,8 +1905,8 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + + padding = bdrv_init_padding(bs, offset, bytes, &pad); + if (padding) { +- mark_request_serialising(req, align); +- wait_serialising_requests(req); ++ bdrv_mark_request_serialising(req, align); ++ bdrv_wait_serialising_requests(req); + + bdrv_padding_rmw_read(child, req, &pad, true); + +@@ -1993,8 +1993,8 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + } + + if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { +- mark_request_serialising(&req, align); +- wait_serialising_requests(&req); ++ bdrv_mark_request_serialising(&req, align); ++ bdrv_wait_serialising_requests(&req); + bdrv_padding_rmw_read(child, &req, &pad, false); + } + +@@ -3078,7 +3078,7 @@ static int coroutine_fn bdrv_co_copy_range_internal( + /* BDRV_REQ_SERIALISING is only for write operation */ + assert(!(read_flags & BDRV_REQ_SERIALISING)); + if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { +- wait_serialising_requests(&req); ++ bdrv_wait_serialising_requests(&req); + } + + ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, +@@ -3205,7 +3205,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, + * new area, we need to make sure that no write requests are made to it + * concurrently or they might be overwritten by preallocation. */ + if (new_bytes) { +- mark_request_serialising(&req, 1); ++ bdrv_mark_request_serialising(&req, 1); + } + if (bs->read_only) { + error_setg(errp, "Image is read-only"); +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 3aa1e832a8..4465b02242 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -962,6 +962,9 @@ extern unsigned int bdrv_drain_all_count; + void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); + void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); + ++bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); ++void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); ++ + int get_tmp_filename(char *filename, int size); + BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); +-- +2.23.0 diff --git a/block-Remove-unused-include.patch b/block-Remove-unused-include.patch new file mode 100644 index 0000000000000000000000000000000000000000..f643ebc66f0f631e949412b706c3210e9d3aead1 --- /dev/null +++ b/block-Remove-unused-include.patch @@ -0,0 +1,31 @@ +From b353d059bddf4b211c2560e7c123f874ed5c8cf6 Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Wed, 21 Oct 2020 17:12:52 +0800 +Subject: [PATCH] block: Remove unused include + +The "qemu-common.h" include is not used, remove it. + +Reported-by: Euler Robot +Signed-off-by: AlexChen +Message-Id: <5F8FFB94.3030209@huawei.com> +Signed-off-by: Max Reitz +(cherry-picked from commit 3d86af858e) +--- + block/dmg-lzfse.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/block/dmg-lzfse.c b/block/dmg-lzfse.c +index 19d25bc646..6798cf4fbf 100644 +--- a/block/dmg-lzfse.c ++++ b/block/dmg-lzfse.c +@@ -22,7 +22,6 @@ + * THE SOFTWARE. + */ + #include "qemu/osdep.h" +-#include "qemu-common.h" + #include "dmg.h" + #include + +-- +2.27.0 + diff --git a/block-backend-Add-device-specific-retry-callback.patch b/block-backend-Add-device-specific-retry-callback.patch new file mode 100644 index 0000000000000000000000000000000000000000..d50d6bc99cdd87e74838b3b2b61a2eabd1df3ed1 --- /dev/null +++ b/block-backend-Add-device-specific-retry-callback.patch @@ -0,0 +1,53 @@ +From f74edc7c8c85874691daf8801c159874ef45aae0 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:47 +0800 +Subject: [PATCH] block-backend: Add device specific retry callback + +Add retry_request_cb in BlockDevOps to do device specific retry action. +Backend's timer would be registered only when the backend is set 'retry' +on errors and the device supports retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 8 ++++++++ + include/sysemu/block-backend.h | 4 ++++ + 2 files changed, 12 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index a9a43b1440..b8f535a5fd 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -958,6 +958,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, + blk->dev_ops = ops; + blk->dev_opaque = opaque; + ++ if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) && ++ ops->retry_request_cb) { ++ blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME, ++ SCALE_MS, ops->retry_request_cb, ++ opaque); ++ } ++ + /* Are we currently quiesced? Should we enforce this right now? */ + if (blk->quiesce_counter && ops->drained_begin) { + ops->drained_begin(opaque); +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 733c4957eb..b58dc6bde8 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -66,6 +66,10 @@ typedef struct BlockDevOps { + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); ++ /* ++ * Runs when retrying failed requests. ++ */ ++ void (*retry_request_cb)(void *opaque); + } BlockDevOps; + + /* This struct is embedded in (the private) BlockBackend struct and contains +-- +2.27.0 + diff --git a/block-backend-Add-timeout-support-for-retry.patch b/block-backend-Add-timeout-support-for-retry.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac1bc66230f533b542e0f0a31d0035f868f3c06b --- /dev/null +++ b/block-backend-Add-timeout-support-for-retry.patch @@ -0,0 +1,74 @@ +From c58269c64af18bc2a22bbef8b92e489214272429 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:49 +0800 +Subject: [PATCH] block-backend: Add timeout support for retry + +Retry should only be triggered when timeout is not reached, so let's check +timeout before retry. Device should also reset retry_start_time after +successful retry. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 25 ++++++++++++++++++++++++- + include/sysemu/block-backend.h | 1 + + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 11f8ff4301..0fe99ffe52 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1633,6 +1633,29 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++static bool blk_error_retry_timeout(BlockBackend *blk) ++{ ++ /* No timeout set, infinite retries. */ ++ if (!blk->retry_timeout) { ++ return false; ++ } ++ ++ /* The first time an error occurs. */ ++ if (!blk->retry_start_time) { ++ blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ return false; ++ } ++ ++ return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time + ++ blk->retry_timeout); ++} ++ ++void blk_error_retry_reset_timeout(BlockBackend *blk) ++{ ++ if (blk->retry_timer && blk->retry_start_time) ++ blk->retry_start_time = 0; ++} ++ + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) + { +@@ -1661,7 +1684,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_RETRY: +- return (blk->retry_timer) ? ++ return (blk->retry_timer && !blk_error_retry_timeout(blk)) ? + BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index b58dc6bde8..58dde446ca 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -184,6 +184,7 @@ void blk_inc_in_flight(BlockBackend *blk); + void blk_dec_in_flight(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); + BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +-- +2.27.0 + diff --git a/block-backend-Enable-retry-action-on-errors.patch b/block-backend-Enable-retry-action-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..241f945cff9edae0f464177226b2b3ef33d98ca0 --- /dev/null +++ b/block-backend-Enable-retry-action-on-errors.patch @@ -0,0 +1,42 @@ +From 8df36cddd1e5e2b3c3598c83a70e8cbb81c26cec Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:48 +0800 +Subject: [PATCH] block-backend: Enable retry action on errors + +Enable retry action when backend's retry timer is available. It would +trigger the timer to do device specific retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index b8f535a5fd..11f8ff4301 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1660,6 +1660,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; ++ case BLOCKDEV_ON_ERROR_RETRY: ++ return (blk->retry_timer) ? ++ BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: + abort(); +@@ -1707,6 +1710,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, + qemu_system_vmstop_request_prepare(); + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ++ blk->retry_interval); ++ send_qmp_error_event(blk, action, is_read, error); + } else { + send_qmp_error_event(blk, action, is_read, error); + } +-- +2.27.0 + diff --git a/block-backend-Introduce-retry-timer.patch b/block-backend-Introduce-retry-timer.patch new file mode 100644 index 0000000000000000000000000000000000000000..e45aacc149860baf2951e34a5b964cae7cc9aea3 --- /dev/null +++ b/block-backend-Introduce-retry-timer.patch @@ -0,0 +1,69 @@ +From 805c2e121e1ad612f63bafec458284554e76d034 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:46 +0800 +Subject: [PATCH] block-backend: Introduce retry timer + +Add a timer to regularly trigger retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 0056b526b8..a9a43b1440 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -31,6 +31,9 @@ + + static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -88,6 +91,15 @@ struct BlockBackend { + * Accessed with atomic ops. + */ + unsigned int in_flight; ++ ++ /* Timer for retry on errors. */ ++ QEMUTimer *retry_timer; ++ /* Interval in ms to trigger next retry. */ ++ int64_t retry_interval; ++ /* Start time of the first error. Used to check timeout. */ ++ int64_t retry_start_time; ++ /* Retry timeout. 0 represents infinite retry. */ ++ int64_t retry_timeout; + }; + + typedef struct BlockBackendAIOCB { +@@ -337,6 +349,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + ++ blk->retry_timer = NULL; ++ blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL; ++ blk->retry_start_time = 0; ++ blk->retry_timeout = 0; ++ + block_acct_init(&blk->stats); + + notifier_list_init(&blk->remove_bs_notifiers); +@@ -423,6 +440,10 @@ static void blk_delete(BlockBackend *blk) + QTAILQ_REMOVE(&block_backends, blk, link); + drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); ++ if (blk->retry_timer) { ++ timer_del(blk->retry_timer); ++ timer_free(blk->retry_timer); ++ } + g_free(blk); + } + +-- +2.27.0 + diff --git a/block-backend-Stop-retrying-when-draining.patch b/block-backend-Stop-retrying-when-draining.patch new file mode 100644 index 0000000000000000000000000000000000000000..13f3ad64d1c2d2db02d830ca67ee3f90e893e46a --- /dev/null +++ b/block-backend-Stop-retrying-when-draining.patch @@ -0,0 +1,37 @@ +From da64af4b1e92c345296d937e66136f86027d1ca2 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 25 Feb 2021 18:03:57 +0800 +Subject: [PATCH] block-backend: Stop retrying when draining + +Retrying failed requests when draining would make the draining hung. So it +is better not to trigger the retry timer when draining. And after the +virtual devices go back to work, they would retry those queued requests. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 2d812e2254..f6c918f1d9 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1741,9 +1741,11 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { +- timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + +- blk->retry_interval); +- send_qmp_error_event(blk, action, is_read, error); ++ if (!blk->quiesce_counter) { ++ timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ++ blk->retry_interval); ++ send_qmp_error_event(blk, action, is_read, error); ++ } + } else { + send_qmp_error_event(blk, action, is_read, error); + } +-- +2.27.0 + diff --git a/block-backup-Add-mirror-sync-mode-bitmap.patch b/block-backup-Add-mirror-sync-mode-bitmap.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb111206baa228ef558d0a567f7d88421a66ad84 --- /dev/null +++ b/block-backup-Add-mirror-sync-mode-bitmap.patch @@ -0,0 +1,252 @@ +From e0a0150e671e8129f11aa3df907e444e91711f53 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:52 -0400 +Subject: [PATCH] block/backup: Add mirror sync mode 'bitmap' + +We don't need or want a new sync mode for simple differences in +semantics. Create a new mode simply named "BITMAP" that is designed to +make use of the new Bitmap Sync Mode field. + +Because the only bitmap sync mode is 'on-success', this adds no new +functionality to the backup job (yet). The old incremental backup mode +is maintained as a syntactic sugar for sync=bitmap, mode=on-success. + +Add all of the plumbing necessary to support this new instruction. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190709232550.10724-6-jsnow@redhat.com +Signed-off-by: John Snow +--- + block/backup.c | 20 ++++++++++++-------- + block/mirror.c | 6 ++++-- + block/replication.c | 2 +- + blockdev.c | 25 +++++++++++++++++++++++-- + include/block/block_int.h | 4 +++- + qapi/block-core.json | 21 +++++++++++++++------ + 6 files changed, 58 insertions(+), 20 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 88354dcb32..e37eda80cd 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -38,9 +38,9 @@ typedef struct CowRequest { + typedef struct BackupBlockJob { + BlockJob common; + BlockBackend *target; +- /* bitmap for sync=incremental */ + BdrvDirtyBitmap *sync_bitmap; + MirrorSyncMode sync_mode; ++ BitmapSyncMode bitmap_mode; + BlockdevOnError on_source_error; + BlockdevOnError on_target_error; + CoRwlock flush_rwlock; +@@ -461,7 +461,7 @@ static int coroutine_fn backup_run(Job *job, Error **errp) + + job_progress_set_remaining(job, s->len); + +- if (s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { ++ if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) { + backup_incremental_init_copy_bitmap(s); + } else { + hbitmap_set(s->copy_bitmap, 0, s->len); +@@ -545,6 +545,7 @@ static int64_t backup_calculate_cluster_size(BlockDriverState *target, + BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap, ++ BitmapSyncMode bitmap_mode, + bool compress, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, +@@ -592,10 +593,13 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + return NULL; + } + +- if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) { ++ /* QMP interface should have handled translating this to bitmap mode */ ++ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); ++ ++ if (sync_mode == MIRROR_SYNC_MODE_BITMAP) { + if (!sync_bitmap) { + error_setg(errp, "must provide a valid bitmap name for " +- "\"incremental\" sync mode"); ++ "'%s' sync mode", MirrorSyncMode_str(sync_mode)); + return NULL; + } + +@@ -605,8 +609,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + } + } else if (sync_bitmap) { + error_setg(errp, +- "a sync_bitmap was provided to backup_run, " +- "but received an incompatible sync_mode (%s)", ++ "a bitmap was given to backup_job_create, " ++ "but it received an incompatible sync_mode (%s)", + MirrorSyncMode_str(sync_mode)); + return NULL; + } +@@ -648,8 +652,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + job->on_source_error = on_source_error; + job->on_target_error = on_target_error; + job->sync_mode = sync_mode; +- job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? +- sync_bitmap : NULL; ++ job->sync_bitmap = sync_bitmap; ++ job->bitmap_mode = bitmap_mode; + job->compress = compress; + + /* Detect image-fleecing (and similar) schemes */ +diff --git a/block/mirror.c b/block/mirror.c +index abcf60a961..ccae49a28e 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1770,8 +1770,10 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + bool is_none_mode; + BlockDriverState *base; + +- if (mode == MIRROR_SYNC_MODE_INCREMENTAL) { +- error_setg(errp, "Sync mode 'incremental' not supported"); ++ if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) || ++ (mode == MIRROR_SYNC_MODE_BITMAP)) { ++ error_setg(errp, "Sync mode '%s' not supported", ++ MirrorSyncMode_str(mode)); + return; + } + is_none_mode = mode == MIRROR_SYNC_MODE_NONE; +diff --git a/block/replication.c b/block/replication.c +index 23b2993d74..936b2f8b5a 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -543,7 +543,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + + s->backup_job = backup_job_create( + NULL, s->secondary_disk->bs, s->hidden_disk->bs, +- 0, MIRROR_SYNC_MODE_NONE, NULL, false, ++ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, + BLOCKDEV_ON_ERROR_REPORT, + BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL, + backup_job_completed, bs, NULL, &local_err); +diff --git a/blockdev.c b/blockdev.c +index aa15ed1f00..34c8b651e1 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3508,12 +3508,31 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return NULL; + } + ++ if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) { ++ if (backup->has_bitmap_mode && ++ backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) { ++ error_setg(errp, "Bitmap sync mode must be '%s' " ++ "when using sync mode '%s'", ++ BitmapSyncMode_str(BITMAP_SYNC_MODE_ON_SUCCESS), ++ MirrorSyncMode_str(backup->sync)); ++ return NULL; ++ } ++ backup->has_bitmap_mode = true; ++ backup->sync = MIRROR_SYNC_MODE_BITMAP; ++ backup->bitmap_mode = BITMAP_SYNC_MODE_ON_SUCCESS; ++ } ++ + if (backup->has_bitmap) { + bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); + if (!bmap) { + error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); + return NULL; + } ++ if (!backup->has_bitmap_mode) { ++ error_setg(errp, "Bitmap sync mode must be given " ++ "when providing a bitmap"); ++ return NULL; ++ } + if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { + return NULL; + } +@@ -3527,8 +3546,10 @@ static BlockJob *do_backup_common(BackupCommon *backup, + } + + job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, +- backup->sync, bmap, backup->compress, +- backup->on_source_error, backup->on_target_error, ++ backup->sync, bmap, backup->bitmap_mode, ++ backup->compress, ++ backup->on_source_error, ++ backup->on_target_error, + job_flags, NULL, NULL, txn, errp); + return job; + } +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 05ee6b4866..76117a761a 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1152,7 +1152,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. +- * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. ++ * @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental' ++ * @bitmap_mode: The bitmap synchronization policy to use. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @creation_flags: Flags that control the behavior of the Job lifetime. +@@ -1168,6 +1169,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + BlockDriverState *target, int64_t speed, + MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, ++ BitmapSyncMode bitmap_mode, + bool compress, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, +diff --git a/qapi/block-core.json b/qapi/block-core.json +index b8d12a4951..97baff3a8c 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1127,12 +1127,15 @@ + # + # @none: only copy data written from now on + # +-# @incremental: only copy data described by the dirty bitmap. Since: 2.4 ++# @incremental: only copy data described by the dirty bitmap. (since: 2.4) ++# ++# @bitmap: only copy data described by the dirty bitmap. (since: 4.2) ++# Behavior on completion is determined by the BitmapSyncMode. + # + # Since: 1.3 + ## + { 'enum': 'MirrorSyncMode', +- 'data': ['top', 'full', 'none', 'incremental'] } ++ 'data': ['top', 'full', 'none', 'incremental', 'bitmap'] } + + ## + # @BitmapSyncMode: +@@ -1343,9 +1346,14 @@ + # @speed: the maximum speed, in bytes per second. The default is 0, + # for unlimited. + # +-# @bitmap: the name of dirty bitmap if sync is "incremental". +-# Must be present if sync is "incremental", must NOT be present +-# otherwise. (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) ++# @bitmap: the name of a dirty bitmap if sync is "bitmap" or "incremental". ++# Must be present if sync is "bitmap" or "incremental". ++# Must not be present otherwise. ++# (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) ++# ++# @bitmap-mode: Specifies the type of data the bitmap should contain after ++# the operation concludes. Must be present if sync is "bitmap". ++# Must NOT be present otherwise. (Since 4.2) + # + # @compress: true to compress data, if the target format supports it. + # (default: false) (since 2.8) +@@ -1380,7 +1388,8 @@ + { 'struct': 'BackupCommon', + 'data': { '*job-id': 'str', 'device': 'str', + 'sync': 'MirrorSyncMode', '*speed': 'int', +- '*bitmap': 'str', '*compress': 'bool', ++ '*bitmap': 'str', '*bitmap-mode': 'BitmapSyncMode', ++ '*compress': 'bool', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError', + '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } +-- +2.27.0 + diff --git a/block-backup-add-never-policy-to-bitmap-sync-mode.patch b/block-backup-add-never-policy-to-bitmap-sync-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7a3dc356084623f89f89f9421467233e82b7dbb --- /dev/null +++ b/block-backup-add-never-policy-to-bitmap-sync-mode.patch @@ -0,0 +1,59 @@ +From 98ed0f915cf3335768ed84ee5dfa54f4e99aaf00 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:53 -0400 +Subject: [PATCH] block/backup: add 'never' policy to bitmap sync mode + +This adds a "never" policy for bitmap synchronization. Regardless of if +the job succeeds or fails, we never update the bitmap. This can be used +to perform differential backups, or simply to avoid the job modifying a +bitmap. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190709232550.10724-7-jsnow@redhat.com +Signed-off-by: John Snow +--- + block/backup.c | 7 +++++-- + qapi/block-core.json | 5 ++++- + 2 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index e37eda80cd..84a56337ac 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -274,8 +274,11 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) + BdrvDirtyBitmap *bm; + BlockDriverState *bs = blk_bs(job->common.blk); + +- if (ret < 0) { +- /* Merge the successor back into the parent, delete nothing. */ ++ if (ret < 0 || job->bitmap_mode == BITMAP_SYNC_MODE_NEVER) { ++ /* ++ * Failure, or we don't want to synchronize the bitmap. ++ * Merge the successor back into the parent, delete nothing. ++ */ + bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); + assert(bm); + } else { +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 97baff3a8c..48a0bfab63 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1146,10 +1146,13 @@ + # @on-success: The bitmap is only synced when the operation is successful. + # This is the behavior always used for 'INCREMENTAL' backups. + # ++# @never: The bitmap is never synchronized with the operation, and is ++# treated solely as a read-only manifest of blocks to copy. ++# + # Since: 4.2 + ## + { 'enum': 'BitmapSyncMode', +- 'data': ['on-success'] } ++ 'data': ['on-success', 'never'] } + + ## + # @MirrorCopyMode: +-- +2.27.0 + diff --git a/block-backup-deal-with-zero-detection.patch b/block-backup-deal-with-zero-detection.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f111e58efff5bb63808d91441f4028bb743fce9 --- /dev/null +++ b/block-backup-deal-with-zero-detection.patch @@ -0,0 +1,83 @@ +From 3cf14b9a7daf0a40eb2af7a86e67cb05f6d2bea6 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 30 Jul 2019 19:32:49 +0300 +Subject: [PATCH] block/backup: deal with zero detection + +We have detect_zeroes option, so at least for blockdev-backup user +should define it if zero-detection is needed. For drive-backup leave +detection enabled by default but do it through existing option instead +of open-coding. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190730163251.755248-2-vsementsov@virtuozzo.com +Signed-off-by: John Snow +--- + block/backup.c | 15 ++++++--------- + blockdev.c | 8 ++++---- + 2 files changed, 10 insertions(+), 13 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index cc19643b47..6023573299 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -110,7 +110,10 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, + BlockBackend *blk = job->common.blk; + int nbytes; + int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0; +- int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0; ++ int write_flags = ++ (job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0) | ++ (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0); ++ + + assert(QEMU_IS_ALIGNED(start, job->cluster_size)); + hbitmap_reset(job->copy_bitmap, start, job->cluster_size); +@@ -128,14 +131,8 @@ static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job, + goto fail; + } + +- if (buffer_is_zero(*bounce_buffer, nbytes)) { +- ret = blk_co_pwrite_zeroes(job->target, start, +- nbytes, write_flags | BDRV_REQ_MAY_UNMAP); +- } else { +- ret = blk_co_pwrite(job->target, start, +- nbytes, *bounce_buffer, write_flags | +- (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0)); +- } ++ ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer, ++ write_flags); + if (ret < 0) { + trace_backup_do_cow_write_fail(job, start, ret); + if (error_is_read) { +diff --git a/blockdev.c b/blockdev.c +index 0a71a15fa2..94e5aee30b 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3572,7 +3572,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + BlockDriverState *source = NULL; + BlockJob *job = NULL; + AioContext *aio_context; +- QDict *options = NULL; ++ QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; +@@ -3645,10 +3645,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + goto out; + } + ++ options = qdict_new(); ++ qdict_put_str(options, "discard", "unmap"); ++ qdict_put_str(options, "detect-zeroes", "unmap"); + if (backup->format) { +- if (!options) { +- options = qdict_new(); +- } + qdict_put_str(options, "driver", backup->format); + } + +-- +2.27.0 + diff --git a/block-backup-hoist-bitmap-check-into-QMP-interface.patch b/block-backup-hoist-bitmap-check-into-QMP-interface.patch new file mode 100644 index 0000000000000000000000000000000000000000..51dc67ccbcd131c09200963de192185c0aa97671 --- /dev/null +++ b/block-backup-hoist-bitmap-check-into-QMP-interface.patch @@ -0,0 +1,73 @@ +From 9cc9e9657aad126502183fa4ceb9b962b55471cb Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:55 -0400 +Subject: [PATCH] block/backup: hoist bitmap check into QMP interface + +This is nicer to do in the unified QMP interface that we have now, +because it lets us use the right terminology back at the user. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190716000117.25219-5-jsnow@redhat.com +Signed-off-by: John Snow +--- + block/backup.c | 13 ++++--------- + blockdev.c | 10 ++++++++++ + 2 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/block/backup.c b/block/backup.c +index 59ac2c0396..cc19643b47 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -565,6 +565,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + assert(bs); + assert(target); + ++ /* QMP interface protects us from these cases */ ++ assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); ++ assert(sync_bitmap || sync_mode != MIRROR_SYNC_MODE_BITMAP); ++ + if (bs == target) { + error_setg(errp, "Source and target cannot be the same"); + return NULL; +@@ -596,16 +600,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + return NULL; + } + +- /* QMP interface should have handled translating this to bitmap mode */ +- assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL); +- + if (sync_mode == MIRROR_SYNC_MODE_BITMAP) { +- if (!sync_bitmap) { +- error_setg(errp, "must provide a valid bitmap name for " +- "'%s' sync mode", MirrorSyncMode_str(sync_mode)); +- return NULL; +- } +- + /* If we need to write to this bitmap, check that we can: */ + if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && + bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { +diff --git a/blockdev.c b/blockdev.c +index efb69d343a..0a71a15fa2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3508,6 +3508,16 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return NULL; + } + ++ if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || ++ (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { ++ /* done before desugaring 'incremental' to print the right message */ ++ if (!backup->has_bitmap) { ++ error_setg(errp, "must provide a valid bitmap name for " ++ "'%s' sync mode", MirrorSyncMode_str(backup->sync)); ++ return NULL; ++ } ++ } ++ + if (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL) { + if (backup->has_bitmap_mode && + backup->bitmap_mode != BITMAP_SYNC_MODE_ON_SUCCESS) { +-- +2.27.0 + diff --git a/block-backup-loosen-restriction-on-readonly-bitmaps.patch b/block-backup-loosen-restriction-on-readonly-bitmaps.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab0617c2b580040a460aad2bd4eb16f3e4687141 --- /dev/null +++ b/block-backup-loosen-restriction-on-readonly-bitmaps.patch @@ -0,0 +1,51 @@ +From 801e9452bc80a38ee26fe12ba42356851acd6a9e Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:54 -0400 +Subject: [PATCH] block/backup: loosen restriction on readonly bitmaps + +With the "never" sync policy, we actually can utilize readonly bitmaps +now. Loosen the check at the QMP level, and tighten it based on +provided arguments down at the job creation level instead. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190709232550.10724-19-jsnow@redhat.com +Signed-off-by: John Snow +--- + block/backup.c | 6 ++++++ + blockdev.c | 2 +- + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/block/backup.c b/block/backup.c +index 84a56337ac..59ac2c0396 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -606,6 +606,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + return NULL; + } + ++ /* If we need to write to this bitmap, check that we can: */ ++ if (bitmap_mode != BITMAP_SYNC_MODE_NEVER && ++ bdrv_dirty_bitmap_check(sync_bitmap, BDRV_BITMAP_DEFAULT, errp)) { ++ return NULL; ++ } ++ + /* Create a new bitmap, and freeze/disable this one. */ + if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) { + return NULL; +diff --git a/blockdev.c b/blockdev.c +index 34c8b651e1..efb69d343a 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3533,7 +3533,7 @@ static BlockJob *do_backup_common(BackupCommon *backup, + "when providing a bitmap"); + return NULL; + } +- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { ++ if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_ALLOW_RO, errp)) { + return NULL; + } + } +-- +2.27.0 + diff --git a/block-bdrv_set_backing_bs-fix-use-after-free.patch b/block-bdrv_set_backing_bs-fix-use-after-free.patch new file mode 100644 index 0000000000000000000000000000000000000000..93ac72169d8518a8fcadc82c7ee01fcfdfcf94fc --- /dev/null +++ b/block-bdrv_set_backing_bs-fix-use-after-free.patch @@ -0,0 +1,116 @@ +From 3754525eb383f91869634766ccd041cfe40bbf17 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Mon, 16 Mar 2020 09:06:30 +0300 +Subject: [PATCH 05/14] block: bdrv_set_backing_bs: fix use-after-free +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There is a use-after-free possible: bdrv_unref_child() leaves +bs->backing freed but not NULL. bdrv_attach_child may produce nested +polling loop due to drain, than access of freed pointer is possible. + +I've produced the following crash on 30 iotest with modified code. It +does not reproduce on master, but still seems possible: + + #0 __strcmp_avx2 () at /lib64/libc.so.6 + #1 bdrv_backing_overridden (bs=0x55c9d3cc2060) at block.c:6350 + #2 bdrv_refresh_filename (bs=0x55c9d3cc2060) at block.c:6404 + #3 bdrv_backing_attach (c=0x55c9d48e5520) at block.c:1063 + #4 bdrv_replace_child_noperm + (child=child@entry=0x55c9d48e5520, + new_bs=new_bs@entry=0x55c9d3cc2060) at block.c:2290 + #5 bdrv_replace_child + (child=child@entry=0x55c9d48e5520, + new_bs=new_bs@entry=0x55c9d3cc2060) at block.c:2320 + #6 bdrv_root_attach_child + (child_bs=child_bs@entry=0x55c9d3cc2060, + child_name=child_name@entry=0x55c9d241d478 "backing", + child_role=child_role@entry=0x55c9d26ecee0 , + ctx=, perm=, shared_perm=21, + opaque=0x55c9d3c5a3d0, errp=0x7ffd117108e0) at block.c:2424 + #7 bdrv_attach_child + (parent_bs=parent_bs@entry=0x55c9d3c5a3d0, + child_bs=child_bs@entry=0x55c9d3cc2060, + child_name=child_name@entry=0x55c9d241d478 "backing", + child_role=child_role@entry=0x55c9d26ecee0 , + errp=errp@entry=0x7ffd117108e0) at block.c:5876 + #8 in bdrv_set_backing_hd + (bs=bs@entry=0x55c9d3c5a3d0, + backing_hd=backing_hd@entry=0x55c9d3cc2060, + errp=errp@entry=0x7ffd117108e0) + at block.c:2576 + #9 stream_prepare (job=0x55c9d49d84a0) at block/stream.c:150 + #10 job_prepare (job=0x55c9d49d84a0) at job.c:761 + #11 job_txn_apply (txn=, fn=) at + job.c:145 + #12 job_do_finalize (job=0x55c9d49d84a0) at job.c:778 + #13 job_completed_txn_success (job=0x55c9d49d84a0) at job.c:832 + #14 job_completed (job=0x55c9d49d84a0) at job.c:845 + #15 job_completed (job=0x55c9d49d84a0) at job.c:836 + #16 job_exit (opaque=0x55c9d49d84a0) at job.c:864 + #17 aio_bh_call (bh=0x55c9d471a160) at util/async.c:117 + #18 aio_bh_poll (ctx=ctx@entry=0x55c9d3c46720) at util/async.c:117 + #19 aio_poll (ctx=ctx@entry=0x55c9d3c46720, + blocking=blocking@entry=true) + at util/aio-posix.c:728 + #20 bdrv_parent_drained_begin_single (poll=true, c=0x55c9d3d558f0) + at block/io.c:121 + #21 bdrv_parent_drained_begin_single (c=c@entry=0x55c9d3d558f0, + poll=poll@entry=true) + at block/io.c:114 + #22 bdrv_replace_child_noperm + (child=child@entry=0x55c9d3d558f0, + new_bs=new_bs@entry=0x55c9d3d27300) at block.c:2258 + #23 bdrv_replace_child + (child=child@entry=0x55c9d3d558f0, + new_bs=new_bs@entry=0x55c9d3d27300) at block.c:2320 + #24 bdrv_root_attach_child + (child_bs=child_bs@entry=0x55c9d3d27300, + child_name=child_name@entry=0x55c9d241d478 "backing", + child_role=child_role@entry=0x55c9d26ecee0 , + ctx=, perm=, shared_perm=21, + opaque=0x55c9d3cc2060, errp=0x7ffd11710c60) at block.c:2424 + #25 bdrv_attach_child + (parent_bs=parent_bs@entry=0x55c9d3cc2060, + child_bs=child_bs@entry=0x55c9d3d27300, + child_name=child_name@entry=0x55c9d241d478 "backing", + child_role=child_role@entry=0x55c9d26ecee0 , + errp=errp@entry=0x7ffd11710c60) at block.c:5876 + #26 bdrv_set_backing_hd + (bs=bs@entry=0x55c9d3cc2060, + backing_hd=backing_hd@entry=0x55c9d3d27300, + errp=errp@entry=0x7ffd11710c60) + at block.c:2576 + #27 stream_prepare (job=0x55c9d495ead0) at block/stream.c:150 + ... + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200316060631.30052-2-vsementsov@virtuozzo.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: John Snow +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + block.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index 29e504b86aff..e834102c87f7 100644 +--- a/block.c ++++ b/block.c +@@ -2549,10 +2549,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + + if (bs->backing) { + bdrv_unref_child(bs, bs->backing); ++ bs->backing = NULL; + } + + if (!backing_hd) { +- bs->backing = NULL; + goto out; + } + +-- +2.26.2 + diff --git a/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch b/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..73d2b9d0e5e490c3f855304e692ed68fd4029468 --- /dev/null +++ b/block-create-Do-not-abort-if-a-block-driver-is-not-a.patch @@ -0,0 +1,95 @@ +From 088f1e8fd9e790bc5766bd43af134230abcff6dd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 12 Sep 2019 00:08:49 +0200 +Subject: [PATCH] block/create: Do not abort if a block driver is not available +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The 'blockdev-create' QMP command was introduced as experimental +feature in commit b0292b851b8, using the assert() debug call. +It got promoted to 'stable' command in 3fb588a0f2c, but the +assert call was not removed. + +Some block drivers are optional, and bdrv_find_format() might +return a NULL value, triggering the assertion. + +Stable code is not expected to abort, so return an error instead. + +This is easily reproducible when libnfs is not installed: + + ./configure + [...] + module support no + Block whitelist (rw) + Block whitelist (ro) + libiscsi support yes + libnfs support no + [...] + +Start QEMU: + + $ qemu-system-x86_64 -S -qmp unix:/tmp/qemu.qmp,server,nowait + +Send the 'blockdev-create' with the 'nfs' driver: + + $ ( cat << 'EOF' + {'execute': 'qmp_capabilities'} + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + EOF + ) | socat STDIO UNIX:/tmp/qemu.qmp + {"QMP": {"version": {"qemu": {"micro": 50, "minor": 1, "major": 4}, "package": "v4.1.0-733-g89ea03a7dc"}, "capabilities": ["oob"]}} + {"return": {}} + +QEMU crashes: + + $ gdb qemu-system-x86_64 core + Program received signal SIGSEGV, Segmentation fault. + (gdb) bt + #0 0x00007ffff510957f in raise () at /lib64/libc.so.6 + #1 0x00007ffff50f3895 in abort () at /lib64/libc.so.6 + #2 0x00007ffff50f3769 in _nl_load_domain.cold.0 () at /lib64/libc.so.6 + #3 0x00007ffff5101a26 in .annobin_assert.c_end () at /lib64/libc.so.6 + #4 0x0000555555d7e1f1 in qmp_blockdev_create (job_id=0x555556baee40 "x", options=0x555557666610, errp=0x7fffffffc770) at block/create.c:69 + #5 0x0000555555c96b52 in qmp_marshal_blockdev_create (args=0x7fffdc003830, ret=0x7fffffffc7f8, errp=0x7fffffffc7f0) at qapi/qapi-commands-block-core.c:1314 + #6 0x0000555555deb0a0 in do_qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false, errp=0x7fffffffc898) at qapi/qmp-dispatch.c:131 + #7 0x0000555555deb2a1 in qmp_dispatch (cmds=0x55555645de70 , request=0x7fffdc005c70, allow_oob=false) at qapi/qmp-dispatch.c:174 + +With this patch applied, QEMU returns a QMP error: + + {'execute': 'blockdev-create', 'arguments': {'job-id': 'x', 'options': {'size': 0, 'driver': 'nfs', 'location': {'path': '/', 'server': {'host': '::1', 'type': 'inet'}}}}, 'id': 'x'} + {"id": "x", "error": {"class": "GenericError", "desc": "Block driver 'nfs' not found or not supported"}} + +Cc: qemu-stable@nongnu.org +Reported-by: Xu Tian +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit d90d5cae2b10efc0e8d0b3cc91ff16201853d3ba) +Signed-off-by: Michael Roth +--- + block/create.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/create.c b/block/create.c +index 95341219ef..de5e97bb18 100644 +--- a/block/create.c ++++ b/block/create.c +@@ -63,9 +63,13 @@ void qmp_blockdev_create(const char *job_id, BlockdevCreateOptions *options, + const char *fmt = BlockdevDriver_str(options->driver); + BlockDriver *drv = bdrv_find_format(fmt); + ++ if (!drv) { ++ error_setg(errp, "Block driver '%s' not found or not supported", fmt); ++ return; ++ } ++ + /* If the driver is in the schema, we know that it exists. But it may not + * be whitelisted. */ +- assert(drv); + if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { + error_setg(errp, "Driver is not whitelisted"); + return; +-- +2.23.0 diff --git a/block-curl-HTTP-header-field-names-are-case-insensit.patch b/block-curl-HTTP-header-field-names-are-case-insensit.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f1028d75bbb4db6b16eca9718c90d5fe2e5795e --- /dev/null +++ b/block-curl-HTTP-header-field-names-are-case-insensit.patch @@ -0,0 +1,54 @@ +From ae2c6d13c4ac625a2c6b217a7f6a17506a2b26e5 Mon Sep 17 00:00:00 2001 +From: Richard Jones +Date: Thu, 28 May 2020 14:27:37 +0100 +Subject: [PATCH] block/curl: HTTP header field names are case insensitive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Richard Jones +Message-id: <20200528142737.17318-3-rjones@redhat.com> +Patchwork-id: 96895 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 2/2] block/curl: HTTP header field names are case insensitive +Bugzilla: 1841038 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Philippe Mathieu-Daudé + +From: David Edmondson + +RFC 7230 section 3.2 indicates that HTTP header field names are case +insensitive. + +Signed-off-by: David Edmondson +Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com> +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 69032253c33ae1774233c63cedf36d32242a85fc) +Signed-off-by: Danilo C. L. de Paula +--- + block/curl.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/curl.c b/block/curl.c +index bfabe7eabd..a298fcc591 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -214,11 +214,12 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) + size_t realsize = size * nmemb; + const char *header = (char *)ptr; + const char *end = header + realsize; +- const char *accept_ranges = "Accept-Ranges:"; ++ const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen(accept_ranges) +- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { ++ && g_ascii_strncasecmp(header, accept_ranges, ++ strlen(accept_ranges)) == 0) { + + char *p = strchr(header, ':') + 1; + +-- +2.27.0 + diff --git a/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch b/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f3aade47ce62e588e7ce490a7a3fc8f873c49de --- /dev/null +++ b/block-curl-HTTP-header-fields-allow-whitespace-aroun.patch @@ -0,0 +1,75 @@ +From c8fd37c06fd24d1242629dda329dd16bea20f319 Mon Sep 17 00:00:00 2001 +From: Richard Jones +Date: Thu, 28 May 2020 14:27:36 +0100 +Subject: [PATCH] block/curl: HTTP header fields allow whitespace around values + +RH-Author: Richard Jones +Message-id: <20200528142737.17318-2-rjones@redhat.com> +Patchwork-id: 96894 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] block/curl: HTTP header fields allow whitespace around values +Bugzilla: 1841038 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Danilo de Paula + +From: David Edmondson + +RFC 7230 section 3.2 indicates that whitespace is permitted between +the field name and field value and after the field value. + +Signed-off-by: David Edmondson +Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com> +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 7788a319399f17476ff1dd43164c869e320820a2) +Signed-off-by: Danilo C. L. de Paula +--- + block/curl.c | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/block/curl.c b/block/curl.c +index d4c8e94f3e..bfabe7eabd 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -212,11 +212,34 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) + { + BDRVCURLState *s = opaque; + size_t realsize = size * nmemb; +- const char *accept_line = "Accept-Ranges: bytes"; ++ const char *header = (char *)ptr; ++ const char *end = header + realsize; ++ const char *accept_ranges = "Accept-Ranges:"; ++ const char *bytes = "bytes"; + +- if (realsize >= strlen(accept_line) +- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) { +- s->accept_range = true; ++ if (realsize >= strlen(accept_ranges) ++ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) { ++ ++ char *p = strchr(header, ':') + 1; ++ ++ /* Skip whitespace between the header name and value. */ ++ while (p < end && *p && g_ascii_isspace(*p)) { ++ p++; ++ } ++ ++ if (end - p >= strlen(bytes) ++ && strncmp(p, bytes, strlen(bytes)) == 0) { ++ ++ /* Check that there is nothing but whitespace after the value. */ ++ p += strlen(bytes); ++ while (p < end && *p && g_ascii_isspace(*p)) { ++ p++; ++ } ++ ++ if (p == end || !*p) { ++ s->accept_range = true; ++ } ++ } + } + + return realsize; +-- +2.27.0 + diff --git a/block-file-posix-Let-post-EOF-fallocate-serialize.patch b/block-file-posix-Let-post-EOF-fallocate-serialize.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf7d34a3e54ffe24356a16185d1dfcaef603c455 --- /dev/null +++ b/block-file-posix-Let-post-EOF-fallocate-serialize.patch @@ -0,0 +1,69 @@ +From 7db05c8a732fbdc986a40aadf0de6dd23057d044 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 1 Nov 2019 16:25:10 +0100 +Subject: [PATCH] block/file-posix: Let post-EOF fallocate serialize + +The XFS kernel driver has a bug that may cause data corruption for qcow2 +images as of qemu commit c8bb23cbdbe32f. We can work around it by +treating post-EOF fallocates as serializing up until infinity (INT64_MAX +in practice). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191101152510.11719-4-mreitz@redhat.com +Signed-off-by: Max Reitz +(cherry picked from commit 292d06b925b2787ee6f2430996b95651cae42fce) +Signed-off-by: Michael Roth +--- + block/file-posix.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 992eb4a798..c5df61b477 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -2623,6 +2623,42 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + RawPosixAIOData acb; + ThreadPoolFunc *handler; + ++#ifdef CONFIG_FALLOCATE ++ if (offset + bytes > bs->total_sectors * BDRV_SECTOR_SIZE) { ++ BdrvTrackedRequest *req; ++ uint64_t end; ++ ++ /* ++ * This is a workaround for a bug in the Linux XFS driver, ++ * where writes submitted through the AIO interface will be ++ * discarded if they happen beyond a concurrently running ++ * fallocate() that increases the file length (i.e., both the ++ * write and the fallocate() happen beyond the EOF). ++ * ++ * To work around it, we extend the tracked request for this ++ * zero write until INT64_MAX (effectively infinity), and mark ++ * it as serializing. ++ * ++ * We have to enable this workaround for all filesystems and ++ * AIO modes (not just XFS with aio=native), because for ++ * remote filesystems we do not know the host configuration. ++ */ ++ ++ req = bdrv_co_get_self_request(bs); ++ assert(req); ++ assert(req->type == BDRV_TRACKED_WRITE); ++ assert(req->offset <= offset); ++ assert(req->offset + req->bytes >= offset + bytes); ++ ++ end = INT64_MAX & -(uint64_t)bs->bl.request_alignment; ++ req->bytes = end - req->offset; ++ req->overlap_bytes = req->bytes; ++ ++ bdrv_mark_request_serialising(req, bs->bl.request_alignment); ++ bdrv_wait_serialising_requests(req); ++ } ++#endif ++ + acb = (RawPosixAIOData) { + .bs = bs, + .aio_fildes = s->fd, +-- +2.23.0 diff --git a/block-file-posix-Reduce-xfsctl-use.patch b/block-file-posix-Reduce-xfsctl-use.patch new file mode 100644 index 0000000000000000000000000000000000000000..69ceb453efac39a1fcfcc26488e04a7bb8eee0df --- /dev/null +++ b/block-file-posix-Reduce-xfsctl-use.patch @@ -0,0 +1,165 @@ +From 6f1a94035b02d3676a897ea5fa4cda4c62128228 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Fri, 23 Aug 2019 15:03:40 +0200 +Subject: [PATCH] block/file-posix: Reduce xfsctl() use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This patch removes xfs_write_zeroes() and xfs_discard(). Both functions +have been added just before the same feature was present through +fallocate(): + +- fallocate() has supported PUNCH_HOLE for XFS since Linux 2.6.38 (March + 2011); xfs_discard() was added in December 2010. + +- fallocate() has supported ZERO_RANGE for XFS since Linux 3.15 (June + 2014); xfs_write_zeroes() was added in November 2013. + +Nowadays, all systems that qemu runs on should support both fallocate() +features (RHEL 7's kernel does). + +xfsctl() is still useful for getting the request alignment for O_DIRECT, +so this patch does not remove our dependency on it completely. + +Note that xfs_write_zeroes() had a bug: It calls ftruncate() when the +file is shorter than the specified range (because ZERO_RANGE does not +increase the file length). ftruncate() may yield and then discard data +that parallel write requests have written past the EOF in the meantime. +Dropping the function altogether fixes the bug. + +Suggested-by: Paolo Bonzini +Fixes: 50ba5b2d994853b38fed10e0841b119da0f8b8e5 +Reported-by: Lukáš Doktor +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Reviewed-by: Stefano Garzarella +Reviewed-by: John Snow +Tested-by: Stefano Garzarella +Tested-by: John Snow +Signed-off-by: Kevin Wolf +(cherry picked from commit b2c6f23f4a9f6d8f1b648705cd46d3713b78d6a2) +Signed-off-by: Michael Roth +--- + block/file-posix.c | 77 +--------------------------------------------- + 1 file changed, 1 insertion(+), 76 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4479cc7ab4..992eb4a798 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1445,59 +1445,6 @@ out: + } + } + +-#ifdef CONFIG_XFS +-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) +-{ +- int64_t len; +- struct xfs_flock64 fl; +- int err; +- +- len = lseek(s->fd, 0, SEEK_END); +- if (len < 0) { +- return -errno; +- } +- +- if (offset + bytes > len) { +- /* XFS_IOC_ZERO_RANGE does not increase the file length */ +- if (ftruncate(s->fd, offset + bytes) < 0) { +- return -errno; +- } +- } +- +- memset(&fl, 0, sizeof(fl)); +- fl.l_whence = SEEK_SET; +- fl.l_start = offset; +- fl.l_len = bytes; +- +- if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) { +- err = errno; +- trace_file_xfs_write_zeroes(strerror(errno)); +- return -err; +- } +- +- return 0; +-} +- +-static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) +-{ +- struct xfs_flock64 fl; +- int err; +- +- memset(&fl, 0, sizeof(fl)); +- fl.l_whence = SEEK_SET; +- fl.l_start = offset; +- fl.l_len = bytes; +- +- if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { +- err = errno; +- trace_file_xfs_discard(strerror(errno)); +- return -err; +- } +- +- return 0; +-} +-#endif +- + static int translate_err(int err) + { + if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || +@@ -1553,10 +1500,8 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) + static int handle_aiocb_write_zeroes(void *opaque) + { + RawPosixAIOData *aiocb = opaque; +-#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS) +- BDRVRawState *s = aiocb->bs->opaque; +-#endif + #ifdef CONFIG_FALLOCATE ++ BDRVRawState *s = aiocb->bs->opaque; + int64_t len; + #endif + +@@ -1564,12 +1509,6 @@ static int handle_aiocb_write_zeroes(void *opaque) + return handle_aiocb_write_zeroes_block(aiocb); + } + +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + #ifdef CONFIG_FALLOCATE_ZERO_RANGE + if (s->has_write_zeroes) { + int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE, +@@ -1632,14 +1571,6 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque) + } + #endif + +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- /* xfs_discard() guarantees that the discarded area reads as all-zero +- * afterwards, so we can use it here. */ +- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + /* If we couldn't manage to unmap while guaranteed that the area reads as + * all-zero afterwards, just write zeroes without unmapping */ + ret = handle_aiocb_write_zeroes(aiocb); +@@ -1716,12 +1647,6 @@ static int handle_aiocb_discard(void *opaque) + ret = -errno; + #endif + } else { +-#ifdef CONFIG_XFS +- if (s->is_xfs) { +- return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); +- } +-#endif +- + #ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes); +-- +2.23.0 diff --git a/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch b/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch new file mode 100644 index 0000000000000000000000000000000000000000..d901f1062659223d2899ab5520759a6a5065545a --- /dev/null +++ b/block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch @@ -0,0 +1,33 @@ +From 5060ef71fa4621061101a30fa9e0d1690696c5c1 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 24 Mar 2020 18:59:21 +0300 +Subject: [PATCH 10/14] block: fix bdrv_root_attach_child forget to unref + child_bs + +bdrv_root_attach_child promises to drop child_bs reference on failure. +It does it on first handled failure path, but not on the second. Fix +that. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200324155921.23822-1-vsementsov@virtuozzo.com> +Signed-off-by: Kevin Wolf +Signed-off-by: Peng Liang +--- + block.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block.c b/block.c +index e834102c87f7..38880eabf801 100644 +--- a/block.c ++++ b/block.c +@@ -2399,6 +2399,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + error_propagate(errp, local_err); + g_free(child); + bdrv_abort_perm_update(child_bs); ++ bdrv_unref(child_bs); + return NULL; + } + } +-- +2.26.2 + diff --git a/block-io-refactor-padding.patch b/block-io-refactor-padding.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a267147f5b5fbfa908edf342c02bd17481b3d70 --- /dev/null +++ b/block-io-refactor-padding.patch @@ -0,0 +1,481 @@ +From 2e2ad02f2cecf419eaad0df982ceb5b41170cc7e Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:05 +0300 +Subject: [PATCH] block/io: refactor padding + +We have similar padding code in bdrv_co_pwritev, +bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify +it. + +[Squashed in Vladimir's qemu-iotests 077 fix +--Stefan] + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 7a3f542fbdfd799be4fa6f8b96dc8c1e6933fce4) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + block/io.c | 365 +++++++++++++++++++++++++++++------------------------ + 1 file changed, 200 insertions(+), 165 deletions(-) + +diff --git a/block/io.c b/block/io.c +index dccf687acc..07d2d825c3 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1408,28 +1408,177 @@ out: + } + + /* +- * Handle a read request in coroutine context ++ * Request padding ++ * ++ * |<---- align ----->| |<----- align ---->| ++ * |<- head ->|<------------- bytes ------------->|<-- tail -->| ++ * | | | | | | ++ * -*----------$-------*-------- ... --------*-----$------------*--- ++ * | | | | | | ++ * | offset | | end | ++ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) ++ * [buf ... ) [tail_buf ) ++ * ++ * @buf is an aligned allocation needed to store @head and @tail paddings. @head ++ * is placed at the beginning of @buf and @tail at the @end. ++ * ++ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk ++ * around tail, if tail exists. ++ * ++ * @merge_reads is true for small requests, ++ * if @buf_len == @head + bytes + @tail. In this case it is possible that both ++ * head and tail exist but @buf_len == align and @tail_buf == @buf. ++ */ ++typedef struct BdrvRequestPadding { ++ uint8_t *buf; ++ size_t buf_len; ++ uint8_t *tail_buf; ++ size_t head; ++ size_t tail; ++ bool merge_reads; ++ QEMUIOVector local_qiov; ++} BdrvRequestPadding; ++ ++static bool bdrv_init_padding(BlockDriverState *bs, ++ int64_t offset, int64_t bytes, ++ BdrvRequestPadding *pad) ++{ ++ uint64_t align = bs->bl.request_alignment; ++ size_t sum; ++ ++ memset(pad, 0, sizeof(*pad)); ++ ++ pad->head = offset & (align - 1); ++ pad->tail = ((offset + bytes) & (align - 1)); ++ if (pad->tail) { ++ pad->tail = align - pad->tail; ++ } ++ ++ if ((!pad->head && !pad->tail) || !bytes) { ++ return false; ++ } ++ ++ sum = pad->head + bytes + pad->tail; ++ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; ++ pad->buf = qemu_blockalign(bs, pad->buf_len); ++ pad->merge_reads = sum == pad->buf_len; ++ if (pad->tail) { ++ pad->tail_buf = pad->buf + pad->buf_len - align; ++ } ++ ++ return true; ++} ++ ++static int bdrv_padding_rmw_read(BdrvChild *child, ++ BdrvTrackedRequest *req, ++ BdrvRequestPadding *pad, ++ bool zero_middle) ++{ ++ QEMUIOVector local_qiov; ++ BlockDriverState *bs = child->bs; ++ uint64_t align = bs->bl.request_alignment; ++ int ret; ++ ++ assert(req->serialising && pad->buf); ++ ++ if (pad->head || pad->merge_reads) { ++ uint64_t bytes = pad->merge_reads ? pad->buf_len : align; ++ ++ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); ++ ++ if (pad->head) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ++ } ++ if (pad->merge_reads && pad->tail) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ++ } ++ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, ++ align, &local_qiov, 0); ++ if (ret < 0) { ++ return ret; ++ } ++ if (pad->head) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); ++ } ++ if (pad->merge_reads && pad->tail) { ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ } ++ ++ if (pad->merge_reads) { ++ goto zero_mem; ++ } ++ } ++ ++ if (pad->tail) { ++ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); ++ ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ++ ret = bdrv_aligned_preadv( ++ child, req, ++ req->overlap_offset + req->overlap_bytes - align, ++ align, align, &local_qiov, 0); ++ if (ret < 0) { ++ return ret; ++ } ++ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ } ++ ++zero_mem: ++ if (zero_middle) { ++ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); ++ } ++ ++ return 0; ++} ++ ++static void bdrv_padding_destroy(BdrvRequestPadding *pad) ++{ ++ if (pad->buf) { ++ qemu_vfree(pad->buf); ++ qemu_iovec_destroy(&pad->local_qiov); ++ } ++} ++ ++/* ++ * bdrv_pad_request ++ * ++ * Exchange request parameters with padded request if needed. Don't include RMW ++ * read of padding, bdrv_padding_rmw_read() should be called separately if ++ * needed. ++ * ++ * All parameters except @bs are in-out: they represent original request at ++ * function call and padded (if padding needed) at function finish. ++ * ++ * Function always succeeds. + */ ++static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, ++ int64_t *offset, unsigned int *bytes, ++ BdrvRequestPadding *pad) ++{ ++ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { ++ return false; ++ } ++ ++ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, ++ *qiov, 0, *bytes, ++ pad->buf + pad->buf_len - pad->tail, pad->tail); ++ *bytes += pad->head + pad->tail; ++ *offset -= pad->head; ++ *qiov = &pad->local_qiov; ++ ++ return true; ++} ++ + int coroutine_fn bdrv_co_preadv(BdrvChild *child, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) + { + BlockDriverState *bs = child->bs; +- BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; +- +- uint64_t align = bs->bl.request_alignment; +- uint8_t *head_buf = NULL; +- uint8_t *tail_buf = NULL; +- QEMUIOVector local_qiov; +- bool use_local_qiov = false; ++ BdrvRequestPadding pad; + int ret; + +- trace_bdrv_co_preadv(child->bs, offset, bytes, flags); +- +- if (!drv) { +- return -ENOMEDIUM; +- } ++ trace_bdrv_co_preadv(bs, offset, bytes, flags); + + ret = bdrv_check_byte_request(bs, offset, bytes); + if (ret < 0) { +@@ -1443,43 +1592,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, + flags |= BDRV_REQ_COPY_ON_READ; + } + +- /* Align read if necessary by padding qiov */ +- if (offset & (align - 1)) { +- head_buf = qemu_blockalign(bs, align); +- qemu_iovec_init(&local_qiov, qiov->niov + 2); +- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- +- bytes += offset & (align - 1); +- offset = offset & ~(align - 1); +- } +- +- if ((offset + bytes) & (align - 1)) { +- if (!use_local_qiov) { +- qemu_iovec_init(&local_qiov, qiov->niov + 1); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- } +- tail_buf = qemu_blockalign(bs, align); +- qemu_iovec_add(&local_qiov, tail_buf, +- align - ((offset + bytes) & (align - 1))); +- +- bytes = ROUND_UP(bytes, align); +- } ++ bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad); + + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); +- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, +- use_local_qiov ? &local_qiov : qiov, +- flags); ++ ret = bdrv_aligned_preadv(child, &req, offset, bytes, ++ bs->bl.request_alignment, ++ qiov, flags); + tracked_request_end(&req); + bdrv_dec_in_flight(bs); + +- if (use_local_qiov) { +- qemu_iovec_destroy(&local_qiov); +- qemu_vfree(head_buf); +- qemu_vfree(tail_buf); +- } ++ bdrv_padding_destroy(&pad); + + return ret; + } +@@ -1775,44 +1897,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + BdrvTrackedRequest *req) + { + BlockDriverState *bs = child->bs; +- uint8_t *buf = NULL; + QEMUIOVector local_qiov; + uint64_t align = bs->bl.request_alignment; +- unsigned int head_padding_bytes, tail_padding_bytes; + int ret = 0; ++ bool padding; ++ BdrvRequestPadding pad; + +- head_padding_bytes = offset & (align - 1); +- tail_padding_bytes = (align - (offset + bytes)) & (align - 1); +- +- +- assert(flags & BDRV_REQ_ZERO_WRITE); +- if (head_padding_bytes || tail_padding_bytes) { +- buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&local_qiov, buf, align); +- } +- if (head_padding_bytes) { +- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); +- +- /* RMW the unaligned part before head. */ ++ padding = bdrv_init_padding(bs, offset, bytes, &pad); ++ if (padding) { + mark_request_serialising(req, align); + wait_serialising_requests(req); +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); +- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, +- align, &local_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + +- memset(buf + head_padding_bytes, 0, zero_bytes); +- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, +- align, &local_qiov, +- flags & ~BDRV_REQ_ZERO_WRITE); +- if (ret < 0) { +- goto fail; ++ bdrv_padding_rmw_read(child, req, &pad, true); ++ ++ if (pad.head || pad.merge_reads) { ++ int64_t aligned_offset = offset & ~(align - 1); ++ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; ++ ++ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); ++ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, ++ align, &local_qiov, ++ flags & ~BDRV_REQ_ZERO_WRITE); ++ if (ret < 0 || pad.merge_reads) { ++ /* Error or all work is done */ ++ goto out; ++ } ++ offset += write_bytes - pad.head; ++ bytes -= write_bytes - pad.head; + } +- offset += zero_bytes; +- bytes -= zero_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); +@@ -1822,7 +1934,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, + NULL, flags); + if (ret < 0) { +- goto fail; ++ goto out; + } + bytes -= aligned_bytes; + offset += aligned_bytes; +@@ -1830,26 +1942,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes) { +- assert(align == tail_padding_bytes + bytes); +- /* RMW the unaligned part after tail. */ +- mark_request_serialising(req, align); +- wait_serialising_requests(req); +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); +- ret = bdrv_aligned_preadv(child, req, offset, align, +- align, &local_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); ++ assert(align == pad.tail + bytes); + +- memset(buf, 0, bytes); ++ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); + ret = bdrv_aligned_pwritev(child, req, offset, align, align, + &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + } +-fail: +- qemu_vfree(buf); +- return ret; + ++out: ++ bdrv_padding_destroy(&pad); ++ ++ return ret; + } + + /* +@@ -1862,10 +1965,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + BlockDriverState *bs = child->bs; + BdrvTrackedRequest req; + uint64_t align = bs->bl.request_alignment; +- uint8_t *head_buf = NULL; +- uint8_t *tail_buf = NULL; +- QEMUIOVector local_qiov; +- bool use_local_qiov = false; ++ BdrvRequestPadding pad; + int ret; + + trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); +@@ -1892,86 +1992,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, + goto out; + } + +- if (offset & (align - 1)) { +- QEMUIOVector head_qiov; +- ++ if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { + mark_request_serialising(&req, align); + wait_serialising_requests(&req); +- +- head_buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&head_qiov, head_buf, align); +- +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); +- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, +- align, &head_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); +- +- qemu_iovec_init(&local_qiov, qiov->niov + 2); +- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- +- bytes += offset & (align - 1); +- offset = offset & ~(align - 1); +- +- /* We have read the tail already if the request is smaller +- * than one aligned block. +- */ +- if (bytes < align) { +- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); +- bytes = align; +- } +- } +- +- if ((offset + bytes) & (align - 1)) { +- QEMUIOVector tail_qiov; +- size_t tail_bytes; +- bool waited; +- +- mark_request_serialising(&req, align); +- waited = wait_serialising_requests(&req); +- assert(!waited || !use_local_qiov); +- +- tail_buf = qemu_blockalign(bs, align); +- qemu_iovec_init_buf(&tail_qiov, tail_buf, align); +- +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); +- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), +- align, align, &tail_qiov, 0); +- if (ret < 0) { +- goto fail; +- } +- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); +- +- if (!use_local_qiov) { +- qemu_iovec_init(&local_qiov, qiov->niov + 1); +- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); +- use_local_qiov = true; +- } +- +- tail_bytes = (offset + bytes) & (align - 1); +- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); +- +- bytes = ROUND_UP(bytes, align); ++ bdrv_padding_rmw_read(child, &req, &pad, false); + } + + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, +- use_local_qiov ? &local_qiov : qiov, +- flags); ++ qiov, flags); + +-fail: ++ bdrv_padding_destroy(&pad); + +- if (use_local_qiov) { +- qemu_iovec_destroy(&local_qiov); +- } +- qemu_vfree(head_buf); +- qemu_vfree(tail_buf); + out: + tracked_request_end(&req); + bdrv_dec_in_flight(bs); ++ + return ret; + } + +-- +2.23.0 diff --git a/block-mirror-fix-use-after-free-of-local_err.patch b/block-mirror-fix-use-after-free-of-local_err.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea2f739410164f7df43f020192cd60653a3b8cf0 --- /dev/null +++ b/block-mirror-fix-use-after-free-of-local_err.patch @@ -0,0 +1,34 @@ +From 682d23829adf0a872d5a3ca6eb4b31c424f558fc Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 24 Mar 2020 18:36:26 +0300 +Subject: [PATCH 09/14] block/mirror: fix use after free of local_err + +local_err is used again in mirror_exit_common() after +bdrv_set_backing_hd(), so we must zero it. Otherwise try to set +non-NULL local_err will crash. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200324153630.11882-3-vsementsov@virtuozzo.com> +Reviewed-by: Eric Blake +Reviewed-by: John Snow +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + block/mirror.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/mirror.c b/block/mirror.c +index 681b305de650..ef6c958ff9b3 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -674,6 +674,7 @@ static int mirror_exit_common(Job *job) + bdrv_set_backing_hd(target_bs, backing, &local_err); + if (local_err) { + error_report_err(local_err); ++ local_err = NULL; + ret = -EPERM; + } + } +-- +2.26.2 + diff --git a/block-nbd-extract-the-common-cleanup-code.patch b/block-nbd-extract-the-common-cleanup-code.patch new file mode 100644 index 0000000000000000000000000000000000000000..4cc24818b7c96cc8c85a0c8ac97dc2452c234610 --- /dev/null +++ b/block-nbd-extract-the-common-cleanup-code.patch @@ -0,0 +1,67 @@ +From 1196a2079a558cbb673e06142fa67a401c5e6c30 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Thu, 5 Dec 2019 11:45:27 +0800 +Subject: [PATCH 6/9] block/nbd: extract the common cleanup code + +The BDRVNBDState cleanup code is common in two places, add +nbd_clear_bdrvstate() function to do these cleanups. + +Suggested-by: Stefano Garzarella +Signed-off-by: Pan Nengyuan +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-Id: <1575517528-44312-2-git-send-email-pannengyuan@huawei.com> +Reviewed-by: Eric Blake +[eblake: fix compilation error and commit message] +Signed-off-by: Eric Blake +Signed-off-by: AlexChen +--- + block/nbd.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 57c1a20..3977b1e 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -73,6 +73,16 @@ typedef struct BDRVNBDState { + char *export, *tlscredsid; + } BDRVNBDState; + ++static void nbd_clear_bdrvstate(BDRVNBDState *s) ++{ ++ qapi_free_SocketAddress(s->saddr); ++ s->saddr = NULL; ++ g_free(s->export); ++ s->export = NULL; ++ g_free(s->tlscredsid); ++ s->tlscredsid = NULL; ++} ++ + static void nbd_recv_coroutines_wake_all(BDRVNBDState *s) + { + int i; +@@ -1640,9 +1650,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, + object_unref(OBJECT(tlscreds)); + } + if (ret < 0) { +- qapi_free_SocketAddress(s->saddr); +- g_free(s->export); +- g_free(s->tlscredsid); ++ nbd_clear_bdrvstate(s); + } + qemu_opts_del(opts); + return ret; +@@ -1692,10 +1700,7 @@ static void nbd_close(BlockDriverState *bs) + BDRVNBDState *s = bs->opaque; + + nbd_client_close(bs); +- +- qapi_free_SocketAddress(s->saddr); +- g_free(s->export); +- g_free(s->tlscredsid); ++ nbd_clear_bdrvstate(s); + } + + static int64_t nbd_getlength(BlockDriverState *bs) +-- +1.8.3.1 + diff --git a/block-nfs-tear-down-aio-before-nfs_close.patch b/block-nfs-tear-down-aio-before-nfs_close.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea116d0a381c18521da88b94d3ea914f0357939d --- /dev/null +++ b/block-nfs-tear-down-aio-before-nfs_close.patch @@ -0,0 +1,41 @@ +From 0694c489cd240620fee5675e8d24c7ce02d1d67d Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Tue, 10 Sep 2019 17:41:09 +0200 +Subject: [PATCH] block/nfs: tear down aio before nfs_close + +nfs_close is a sync call from libnfs and has its own event +handler polling on the nfs FD. Avoid that both QEMU and libnfs +are intefering here. + +CC: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Signed-off-by: Kevin Wolf +(cherry picked from commit 601dc6559725f7a614b6f893611e17ff0908e914) +Signed-off-by: Michael Roth +--- + block/nfs.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/block/nfs.c b/block/nfs.c +index d93241b3bb..2b7a078241 100644 +--- a/block/nfs.c ++++ b/block/nfs.c +@@ -390,12 +390,14 @@ static void nfs_attach_aio_context(BlockDriverState *bs, + static void nfs_client_close(NFSClient *client) + { + if (client->context) { ++ qemu_mutex_lock(&client->mutex); ++ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), ++ false, NULL, NULL, NULL, NULL); ++ qemu_mutex_unlock(&client->mutex); + if (client->fh) { + nfs_close(client->context, client->fh); + client->fh = NULL; + } +- aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), +- false, NULL, NULL, NULL, NULL); + nfs_destroy_context(client->context); + client->context = NULL; + } +-- +2.23.0 diff --git a/block-posix-Always-allocate-the-first-block.patch b/block-posix-Always-allocate-the-first-block.patch new file mode 100644 index 0000000000000000000000000000000000000000..166d73957ce0569929276d446a1934be17cad612 --- /dev/null +++ b/block-posix-Always-allocate-the-first-block.patch @@ -0,0 +1,343 @@ +From 3d018ff3bdd8aec260254036b600cfa8d694ced4 Mon Sep 17 00:00:00 2001 +From: Nir Soffer +Date: Tue, 27 Aug 2019 04:05:27 +0300 +Subject: [PATCH] block: posix: Always allocate the first block + +When creating an image with preallocation "off" or "falloc", the first +block of the image is typically not allocated. When using Gluster +storage backed by XFS filesystem, reading this block using direct I/O +succeeds regardless of request length, fooling alignment detection. + +In this case we fallback to a safe value (4096) instead of the optimal +value (512), which may lead to unneeded data copying when aligning +requests. Allocating the first block avoids the fallback. + +Since we allocate the first block even with preallocation=off, we no +longer create images with zero disk size: + + $ ./qemu-img create -f raw test.raw 1g + Formatting 'test.raw', fmt=raw size=1073741824 + + $ ls -lhs test.raw + 4.0K -rw-r--r--. 1 nsoffer nsoffer 1.0G Aug 16 23:48 test.raw + +And converting the image requires additional cluster: + + $ ./qemu-img measure -f raw -O qcow2 test.raw + required size: 458752 + fully allocated size: 1074135040 + +When using format like vmdk with multiple files per image, we allocate +one block per file: + + $ ./qemu-img create -f vmdk -o subformat=twoGbMaxExtentFlat test.vmdk 4g + Formatting 'test.vmdk', fmt=vmdk size=4294967296 compat6=off hwversion=undefined subformat=twoGbMaxExtentFlat + + $ ls -lhs test*.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f001.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 2.0G Aug 27 03:23 test-f002.vmdk + 4.0K -rw-r--r--. 1 nsoffer nsoffer 353 Aug 27 03:23 test.vmdk + +I did quick performance test for copying disks with qemu-img convert to +new raw target image to Gluster storage with sector size of 512 bytes: + + for i in $(seq 10); do + rm -f dst.raw + sleep 10 + time ./qemu-img convert -f raw -O raw -t none -T none src.raw dst.raw + done + +Here is a table comparing the total time spent: + +Type Before(s) After(s) Diff(%) +--------------------------------------- +real 530.028 469.123 -11.4 +user 17.204 10.768 -37.4 +sys 17.881 7.011 -60.7 + +We can see very clear improvement in CPU usage. + +Signed-off-by: Nir Soffer +Message-id: 20190827010528.8818-2-nsoffer@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz + +(cherry picked from commit 3a20013fbb26d2a1bd11ef148eefdb1508783787) + +Signed-off-by: Michael Roth +--- + block/file-posix.c | 51 +++++++++++++++++++ + tests/qemu-iotests/059.out | 2 +- + tests/qemu-iotests/{150.out => 150.out.qcow2} | 0 + tests/qemu-iotests/150.out.raw | 12 +++++ + tests/qemu-iotests/175 | 19 ++++--- + tests/qemu-iotests/175.out | 8 +-- + tests/qemu-iotests/178.out.qcow2 | 4 +- + tests/qemu-iotests/221.out | 12 +++-- + tests/qemu-iotests/253.out | 12 +++-- + 9 files changed, 99 insertions(+), 21 deletions(-) + rename tests/qemu-iotests/{150.out => 150.out.qcow2} (100%) + create mode 100644 tests/qemu-iotests/150.out.raw + +diff --git a/block/file-posix.c b/block/file-posix.c +index be32dd8c51..2184aa980c 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1674,6 +1674,43 @@ static int handle_aiocb_discard(void *opaque) + return ret; + } + ++/* ++ * Help alignment probing by allocating the first block. ++ * ++ * When reading with direct I/O from unallocated area on Gluster backed by XFS, ++ * reading succeeds regardless of request length. In this case we fallback to ++ * safe alignment which is not optimal. Allocating the first block avoids this ++ * fallback. ++ * ++ * fd may be opened with O_DIRECT, but we don't know the buffer alignment or ++ * request alignment, so we use safe values. ++ * ++ * Returns: 0 on success, -errno on failure. Since this is an optimization, ++ * caller may ignore failures. ++ */ ++static int allocate_first_block(int fd, size_t max_size) ++{ ++ size_t write_size = (max_size < MAX_BLOCKSIZE) ++ ? BDRV_SECTOR_SIZE ++ : MAX_BLOCKSIZE; ++ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); ++ void *buf; ++ ssize_t n; ++ int ret; ++ ++ buf = qemu_memalign(max_align, write_size); ++ memset(buf, 0, write_size); ++ ++ do { ++ n = pwrite(fd, buf, write_size, 0); ++ } while (n == -1 && errno == EINTR); ++ ++ ret = (n == -1) ? -errno : 0; ++ ++ qemu_vfree(buf); ++ return ret; ++} ++ + static int handle_aiocb_truncate(void *opaque) + { + RawPosixAIOData *aiocb = opaque; +@@ -1713,6 +1750,17 @@ static int handle_aiocb_truncate(void *opaque) + /* posix_fallocate() doesn't set errno. */ + error_setg_errno(errp, -result, + "Could not preallocate new data"); ++ } else if (current_length == 0) { ++ /* ++ * posix_fallocate() uses fallocate() if the filesystem ++ * supports it, or fallback to manually writing zeroes. If ++ * fallocate() was used, unaligned reads from the fallocated ++ * area in raw_probe_alignment() will succeed, hence we need to ++ * allocate the first block. ++ * ++ * Optimize future alignment probing; ignore failures. ++ */ ++ allocate_first_block(fd, offset); + } + } else { + result = 0; +@@ -1774,6 +1822,9 @@ static int handle_aiocb_truncate(void *opaque) + if (ftruncate(fd, offset) != 0) { + result = -errno; + error_setg_errno(errp, -result, "Could not resize file"); ++ } else if (current_length == 0 && offset > current_length) { ++ /* Optimize future alignment probing; ignore failures. */ ++ allocate_first_block(fd, offset); + } + return result; + default: +diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out +index 4fab42a28c..fe3f861f3c 100644 +--- a/tests/qemu-iotests/059.out ++++ b/tests/qemu-iotests/059.out +@@ -27,7 +27,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824000 subformat=twoGbMax + image: TEST_DIR/t.vmdk + file format: vmdk + virtual size: 0.977 TiB (1073741824000 bytes) +-disk size: 16 KiB ++disk size: 1.97 MiB + Format specific information: + cid: XXXXXXXX + parent cid: XXXXXXXX +diff --git a/tests/qemu-iotests/150.out b/tests/qemu-iotests/150.out.qcow2 +similarity index 100% +rename from tests/qemu-iotests/150.out +rename to tests/qemu-iotests/150.out.qcow2 +diff --git a/tests/qemu-iotests/150.out.raw b/tests/qemu-iotests/150.out.raw +new file mode 100644 +index 0000000000..3cdc7727a5 +--- /dev/null ++++ b/tests/qemu-iotests/150.out.raw +@@ -0,0 +1,12 @@ ++QA output created by 150 ++ ++=== Mapping sparse conversion === ++ ++Offset Length File ++0 0x1000 TEST_DIR/t.IMGFMT ++ ++=== Mapping non-sparse conversion === ++ ++Offset Length File ++0 0x100000 TEST_DIR/t.IMGFMT ++*** done +diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 +index 51e62c8276..7ba28b3c1b 100755 +--- a/tests/qemu-iotests/175 ++++ b/tests/qemu-iotests/175 +@@ -37,14 +37,16 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 + # the file size. This function hides the resulting difference in the + # stat -c '%b' output. + # Parameter 1: Number of blocks an empty file occupies +-# Parameter 2: Image size in bytes ++# Parameter 2: Minimal number of blocks in an image ++# Parameter 3: Image size in bytes + _filter_blocks() + { + extra_blocks=$1 +- img_size=$2 ++ min_blocks=$2 ++ img_size=$3 + +- sed -e "s/blocks=$extra_blocks\\(\$\\|[^0-9]\\)/nothing allocated/" \ +- -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/everything allocated/" ++ sed -e "s/blocks=$min_blocks\\(\$\\|[^0-9]\\)/min allocation/" \ ++ -e "s/blocks=$((extra_blocks + img_size / 512))\\(\$\\|[^0-9]\\)/max allocation/" + } + + # get standard environment, filters and checks +@@ -60,16 +62,21 @@ size=$((1 * 1024 * 1024)) + touch "$TEST_DIR/empty" + extra_blocks=$(stat -c '%b' "$TEST_DIR/empty") + ++# We always write the first byte; check how many blocks this filesystem ++# allocates to match empty image alloation. ++printf "\0" > "$TEST_DIR/empty" ++min_blocks=$(stat -c '%b' "$TEST_DIR/empty") ++ + echo + echo "== creating image with default preallocation ==" + _make_test_img $size | _filter_imgfmt +-stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + + for mode in off full falloc; do + echo + echo "== creating image with preallocation $mode ==" + IMGOPTS=preallocation=$mode _make_test_img $size | _filter_imgfmt +- stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $size ++ stat -c "size=%s, blocks=%b" $TEST_IMG | _filter_blocks $extra_blocks $min_blocks $size + done + + # success, all done +diff --git a/tests/qemu-iotests/175.out b/tests/qemu-iotests/175.out +index 6d9a5ed84e..263e521262 100644 +--- a/tests/qemu-iotests/175.out ++++ b/tests/qemu-iotests/175.out +@@ -2,17 +2,17 @@ QA output created by 175 + + == creating image with default preallocation == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation off == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=off +-size=1048576, nothing allocated ++size=1048576, min allocation + + == creating image with preallocation full == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=full +-size=1048576, everything allocated ++size=1048576, max allocation + + == creating image with preallocation falloc == + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 preallocation=falloc +-size=1048576, everything allocated ++size=1048576, max allocation + *** done +diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2 +index 55a8dc926f..9e7d8c44df 100644 +--- a/tests/qemu-iotests/178.out.qcow2 ++++ b/tests/qemu-iotests/178.out.qcow2 +@@ -101,7 +101,7 @@ converted image file size in bytes: 196608 + == raw input image with data (human) == + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 +-required size: 393216 ++required size: 458752 + fully allocated size: 1074135040 + wrote 512/512 bytes at offset 512 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +@@ -257,7 +257,7 @@ converted image file size in bytes: 196608 + + Formatting 'TEST_DIR/t.qcow2', fmt=IMGFMT size=1073741824 + { +- "required": 393216, ++ "required": 458752, + "fully-allocated": 1074135040 + } + wrote 512/512 bytes at offset 512 +diff --git a/tests/qemu-iotests/221.out b/tests/qemu-iotests/221.out +index 9f9dd52bb0..dca024a0c3 100644 +--- a/tests/qemu-iotests/221.out ++++ b/tests/qemu-iotests/221.out +@@ -3,14 +3,18 @@ QA output created by 221 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=65537 +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 66048, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61952, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 1/1 bytes at offset 65536 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 65536, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 61440, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 65536, "length": 1, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, + { "start": 65537, "length": 511, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + *** done +diff --git a/tests/qemu-iotests/253.out b/tests/qemu-iotests/253.out +index 607c0baa0b..3d08b305d7 100644 +--- a/tests/qemu-iotests/253.out ++++ b/tests/qemu-iotests/253.out +@@ -3,12 +3,16 @@ QA output created by 253 + === Check mapping of unaligned raw image === + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048575 +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] +-[{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 1044480, "depth": 0, "zero": true, "data": false, "offset": OFFSET}] + wrote 65535/65535 bytes at offset 983040 + 63.999 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] +-[{ "start": 0, "length": 983040, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, ++[{ "start": 0, "length": 4096, "depth": 0, "zero": false, "data": true, "offset": OFFSET}, ++{ "start": 4096, "length": 978944, "depth": 0, "zero": true, "data": false, "offset": OFFSET}, + { "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true, "offset": OFFSET}] + *** done +-- +2.23.0 diff --git a/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch b/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch new file mode 100644 index 0000000000000000000000000000000000000000..f77cc06c60dd36ccd84a5ad5c5e9748bb2126c08 --- /dev/null +++ b/block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch @@ -0,0 +1,66 @@ +From 84f22c728520792f1010074e0d5ac2ec8e2e372c Mon Sep 17 00:00:00 2001 +From: Maxim Levitsky +Date: Sun, 15 Sep 2019 23:36:53 +0300 +Subject: [PATCH] block/qcow2: Fix corruption introduced by commit 8ac0f15f335 + +This fixes subtle corruption introduced by luks threaded encryption +in commit 8ac0f15f335 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1745922 + +The corruption happens when we do a write that + * writes to two or more unallocated clusters at once + * doesn't fully cover the first sector + * doesn't fully cover the last sector + * uses luks encryption + +In this case, when allocating the new clusters we COW both areas +prior to the write and after the write, and we encrypt them. + +The above mentioned commit accidentally made it so we encrypt the +second COW area using the physical cluster offset of the first area. + +The problem is that offset_in_cluster in do_perform_cow_encrypt +can be larger that the cluster size, thus cluster_offset +will no longer point to the start of the cluster at which encrypted +area starts. + +Next patch in this series will refactor the code to avoid all these +assumptions. + +In the bugreport that was triggered by rebasing a luks image to new, +zero filled base, which lot of such writes, and causes some files +with zero areas to contain garbage there instead. +But as described above it can happen elsewhere as well + +Signed-off-by: Maxim Levitsky +Reviewed-by: Vladimir Sementsov-Ogievskiy +Message-id: 20190915203655.21638-2-mlevitsk@redhat.com +Reviewed-by: Max Reitz +Signed-off-by: Max Reitz +(cherry picked from commit 38e7d54bdc518b5a05a922467304bcace2396945) +Signed-off-by: Michael Roth +--- + block/qcow2-cluster.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index cc5609e27a..760564c8fb 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -473,9 +473,10 @@ static bool coroutine_fn do_perform_cow_encrypt(BlockDriverState *bs, + assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0); + assert((bytes & ~BDRV_SECTOR_MASK) == 0); + assert(s->crypto); +- if (qcow2_co_encrypt(bs, cluster_offset, +- src_cluster_offset + offset_in_cluster, +- buffer, bytes) < 0) { ++ if (qcow2_co_encrypt(bs, ++ start_of_cluster(s, cluster_offset + offset_in_cluster), ++ src_cluster_offset + offset_in_cluster, ++ buffer, bytes) < 0) { + return false; + } + } +-- +2.23.0 diff --git a/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch b/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch new file mode 100644 index 0000000000000000000000000000000000000000..44b0ea19e95b95bbd583034e9c830e3dd6d647e6 --- /dev/null +++ b/block-qcow2-do-free-crypto_opts-in-qcow2_close.patch @@ -0,0 +1,54 @@ +From 88ef4e1862987227f8b87228cff94be3af66d054 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Thu, 27 Feb 2020 09:29:49 +0800 +Subject: [PATCH 01/14] block/qcow2: do free crypto_opts in qcow2_close() + +'crypto_opts' forgot to free in qcow2_close(), this patch fix the bellow leak stack: + +Direct leak of 24 byte(s) in 1 object(s) allocated from: + #0 0x7f0edd81f970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) + #1 0x7f0edc6d149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) + #2 0x55d7eaede63d in qobject_input_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qobject-input-visitor.c:295 + #3 0x55d7eaed78b8 in visit_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qapi-visit-core.c:49 + #4 0x55d7eaf5140b in visit_type_QCryptoBlockOpenOptions qapi/qapi-visit-crypto.c:290 + #5 0x55d7eae43af3 in block_crypto_open_opts_init /mnt/sdb/qemu-new/qemu_test/qemu/block/crypto.c:163 + #6 0x55d7eacd2924 in qcow2_update_options_prepare /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1148 + #7 0x55d7eacd33f7 in qcow2_update_options /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1232 + #8 0x55d7eacd9680 in qcow2_do_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1512 + #9 0x55d7eacdc55e in qcow2_open_entry /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1792 + #10 0x55d7eacdc8fe in qcow2_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1819 + #11 0x55d7eac3742d in bdrv_open_driver /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1317 + #12 0x55d7eac3e990 in bdrv_open_common /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1575 + #13 0x55d7eac4442c in bdrv_open_inherit /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3126 + #14 0x55d7eac45c3f in bdrv_open /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3219 + #15 0x55d7ead8e8a4 in blk_new_open /mnt/sdb/qemu-new/qemu_test/qemu/block/block-backend.c:397 + #16 0x55d7eacde74c in qcow2_co_create /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3534 + #17 0x55d7eacdfa6d in qcow2_co_create_opts /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3668 + #18 0x55d7eac1c678 in bdrv_create_co_entry /mnt/sdb/qemu-new/qemu_test/qemu/block.c:485 + #19 0x55d7eb0024d2 in coroutine_trampoline /mnt/sdb/qemu-new/qemu_test/qemu/util/coroutine-ucontext.c:115 + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Reviewed-by: Max Reitz +Message-Id: <20200227012950.12256-2-pannengyuan@huawei.com> +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + block/qcow2.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 1909df6e1d24..27c54b9905aa 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2408,6 +2408,7 @@ static void qcow2_close(BlockDriverState *bs) + + qcrypto_block_free(s->crypto); + s->crypto = NULL; ++ qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + + g_free(s->unknown_header_fields); + cleanup_unknown_header_ext(bs); +-- +2.26.2 + diff --git a/block-qcow2-threads-fix-qcow2_decompress.patch b/block-qcow2-threads-fix-qcow2_decompress.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2fd9ee74fc5b57fb57ee1f655763895f1fe4356 --- /dev/null +++ b/block-qcow2-threads-fix-qcow2_decompress.patch @@ -0,0 +1,75 @@ +From a583b6b616b086d3fdce93e255d24ab2c865efd3 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Mon, 2 Mar 2020 18:09:30 +0300 +Subject: [PATCH 03/14] block/qcow2-threads: fix qcow2_decompress +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +On success path we return what inflate() returns instead of 0. And it +most probably works for Z_STREAM_END as it is positive, but is +definitely broken for Z_BUF_ERROR. + +While being here, switch to errno return code, to be closer to +qcow2_compress API (and usual expectations). + +Revert condition in if to be more positive. Drop dead initialization of +ret. + +Cc: qemu-stable@nongnu.org # v4.0 +Fixes: 341926ab83e2b +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200302150930.16218-1-vsementsov@virtuozzo.com> +Reviewed-by: Alberto Garcia +Reviewed-by: Ján Tomko +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + block/qcow2-threads.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c +index 3b1e63fe414d..449cd3c0a1f4 100644 +--- a/block/qcow2-threads.c ++++ b/block/qcow2-threads.c +@@ -128,12 +128,12 @@ static ssize_t qcow2_compress(void *dest, size_t dest_size, + * @src - source buffer, @src_size bytes + * + * Returns: 0 on success +- * -1 on fail ++ * -EIO on fail + */ + static ssize_t qcow2_decompress(void *dest, size_t dest_size, + const void *src, size_t src_size) + { +- int ret = 0; ++ int ret; + z_stream strm; + + memset(&strm, 0, sizeof(strm)); +@@ -144,17 +144,19 @@ static ssize_t qcow2_decompress(void *dest, size_t dest_size, + + ret = inflateInit2(&strm, -12); + if (ret != Z_OK) { +- return -1; ++ return -EIO; + } + + ret = inflate(&strm, Z_FINISH); +- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) { ++ if ((ret == Z_STREAM_END || ret == Z_BUF_ERROR) && strm.avail_out == 0) { + /* + * We approve Z_BUF_ERROR because we need @dest buffer to be filled, but + * @src buffer may be processed partly (because in qcow2 we know size of + * compressed data with precision of one sector) + */ +- ret = -1; ++ ret = 0; ++ } else { ++ ret = -EIO; + } + + inflateEnd(&strm); +-- +2.26.2 + diff --git a/block-snapshot-Restrict-set-of-snapshot-nodes.patch b/block-snapshot-Restrict-set-of-snapshot-nodes.patch new file mode 100644 index 0000000000000000000000000000000000000000..c29f30adc897f5b60bf8004b7f317b6e6257bf3a --- /dev/null +++ b/block-snapshot-Restrict-set-of-snapshot-nodes.patch @@ -0,0 +1,124 @@ +From 7a8aa6c734bb1c2927ad0cc1d10bcacb53cf4ae3 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 17 Sep 2019 12:26:23 +0200 +Subject: [PATCH] block/snapshot: Restrict set of snapshot nodes + +Nodes involved in internal snapshots were those that were returned by +bdrv_next(), inserted and not read-only. bdrv_next() in turn returns all +nodes that are either the root node of a BlockBackend or monitor-owned +nodes. + +With the typical -drive use, this worked well enough. However, in the +typical -blockdev case, the user defines one node per option, making all +nodes monitor-owned nodes. This includes protocol nodes etc. which often +are not snapshottable, so "savevm" only returns an error. + +Change the conditions so that internal snapshot still include all nodes +that have a BlockBackend attached (we definitely want to snapshot +anything attached to a guest device and probably also the built-in NBD +server; snapshotting block job BlockBackends is more of an accident, but +a preexisting one), but other monitor-owned nodes are only included if +they have no parents. + +This makes internal snapshots usable again with typical -blockdev +configurations. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Peter Krempa +Tested-by: Peter Krempa +(cherry picked from commit 05f4aced658a02b02d3e89a6c7a2281008fcf26c) +Signed-off-by: Michael Roth +--- + block/snapshot.c | 26 +++++++++++++++++++------- + 1 file changed, 19 insertions(+), 7 deletions(-) + +diff --git a/block/snapshot.c b/block/snapshot.c +index f2f48f926a..8081616ae9 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -31,6 +31,7 @@ + #include "qapi/qmp/qerror.h" + #include "qapi/qmp/qstring.h" + #include "qemu/option.h" ++#include "sysemu/block-backend.h" + + QemuOptsList internal_snapshot_opts = { + .name = "snapshot", +@@ -384,6 +385,16 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + return ret; + } + ++static bool bdrv_all_snapshots_includes_bs(BlockDriverState *bs) ++{ ++ if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { ++ return false; ++ } ++ ++ /* Include all nodes that are either in use by a BlockBackend, or that ++ * aren't attached to any node, but owned by the monitor. */ ++ return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); ++} + + /* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire +@@ -399,7 +410,7 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); +@@ -426,8 +437,9 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs) && +- bdrv_snapshot_find(bs, snapshot, name) >= 0) { ++ if (bdrv_all_snapshots_includes_bs(bs) && ++ bdrv_snapshot_find(bs, snapshot, name) >= 0) ++ { + ret = bdrv_snapshot_delete(bs, snapshot->id_str, + snapshot->name, err); + } +@@ -455,7 +467,7 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs, + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_goto(bs, name, errp); + } + aio_context_release(ctx); +@@ -481,7 +493,7 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); +- if (bdrv_can_snapshot(bs)) { ++ if (bdrv_all_snapshots_includes_bs(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); +@@ -512,7 +524,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); +- } else if (bdrv_can_snapshot(bs)) { ++ } else if (bdrv_all_snapshots_includes_bs(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } +@@ -538,7 +550,7 @@ BlockDriverState *bdrv_all_find_vmstate_bs(void) + bool found; + + aio_context_acquire(ctx); +- found = bdrv_can_snapshot(bs); ++ found = bdrv_all_snapshots_includes_bs(bs) && bdrv_can_snapshot(bs); + aio_context_release(ctx); + + if (found) { +-- +2.23.0 diff --git a/block-vvfat-Fix-bad-printf-format-specifiers.patch b/block-vvfat-Fix-bad-printf-format-specifiers.patch new file mode 100644 index 0000000000000000000000000000000000000000..597b9782683778c799e3192fd66d27eb230eee8e --- /dev/null +++ b/block-vvfat-Fix-bad-printf-format-specifiers.patch @@ -0,0 +1,77 @@ +From c9a4e85610bffe1803648c431e4cff4539a42323 Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Tue, 3 Nov 2020 17:42:56 +0800 +Subject: [PATCH] block/vvfat: Fix bad printf format specifiers + +We should use printf format specifier "%u" instead of "%d" for +argument of type "unsigned int". +In addition, fix two error format problems found by checkpatch.pl: +ERROR: space required after that ',' (ctx:VxV) ++ fprintf(stderr,"%s attributes=0x%02x begin=%u size=%d\n", + ^ +ERROR: line over 90 characters ++ fprintf(stderr, "%d, %s (%u, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); + +Reported-by: Euler Robot +Signed-off-by: Alex Chen +Message-Id: <5FA12620.6030705@huawei.com> +Signed-off-by: Kevin Wolf +(cherry-picked from commit c9eb2f3e38) +--- + block/vvfat.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/vvfat.c b/block/vvfat.c +index f6c28805dd..5dc8d6eb4c 100644 +--- a/block/vvfat.c ++++ b/block/vvfat.c +@@ -1453,7 +1453,7 @@ static void print_direntry(const direntry_t* direntry) + for(i=0;i<11;i++) + ADD_CHAR(direntry->name[i]); + buffer[j] = 0; +- fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n", ++ fprintf(stderr, "%s attributes=0x%02x begin=%u size=%u\n", + buffer, + direntry->attributes, + begin_of_direntry(direntry),le32_to_cpu(direntry->size)); +@@ -1462,7 +1462,7 @@ static void print_direntry(const direntry_t* direntry) + + static void print_mapping(const mapping_t* mapping) + { +- fprintf(stderr, "mapping (%p): begin, end = %d, %d, dir_index = %d, " ++ fprintf(stderr, "mapping (%p): begin, end = %u, %u, dir_index = %u, " + "first_mapping_index = %d, name = %s, mode = 0x%x, " , + mapping, mapping->begin, mapping->end, mapping->dir_index, + mapping->first_mapping_index, mapping->path, mapping->mode); +@@ -1470,7 +1470,7 @@ static void print_mapping(const mapping_t* mapping) + if (mapping->mode & MODE_DIRECTORY) + fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index); + else +- fprintf(stderr, "offset = %d\n", mapping->info.file.offset); ++ fprintf(stderr, "offset = %u\n", mapping->info.file.offset); + } + #endif + +@@ -1604,7 +1604,7 @@ typedef struct commit_t { + static void clear_commits(BDRVVVFATState* s) + { + int i; +-DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next)); ++DLOG(fprintf(stderr, "clear_commits (%u commits)\n", s->commits.next)); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); + assert(commit->path || commit->action == ACTION_WRITEOUT); +@@ -2660,7 +2660,9 @@ static int handle_renames_and_mkdirs(BDRVVVFATState* s) + fprintf(stderr, "handle_renames\n"); + for (i = 0; i < s->commits.next; i++) { + commit_t* commit = array_get(&(s->commits), i); +- fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action); ++ fprintf(stderr, "%d, %s (%u, %d)\n", i, ++ commit->path ? commit->path : "(null)", ++ commit->param.rename.cluster, commit->action); + } + #endif + +-- +2.27.0 + diff --git a/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch b/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch new file mode 100644 index 0000000000000000000000000000000000000000..a232c7450689ae13c12e215803a7d1a2bca4c158 --- /dev/null +++ b/blockdev-Return-bs-to-the-proper-context-on-snapshot.patch @@ -0,0 +1,93 @@ +From dc6b61f12750b3ab5a3965af2ec758750389233d Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 8 Jan 2020 15:31:37 +0100 +Subject: [PATCH] blockdev: Return bs to the proper context on snapshot abort + +external_snapshot_abort() calls to bdrv_set_backing_hd(), which +returns state->old_bs to the main AioContext, as it's intended to be +used then the BDS is going to be released. As that's not the case when +aborting an external snapshot, return it to the AioContext it was +before the call. + +This issue can be triggered by issuing a transaction with two actions, +a proper blockdev-snapshot-sync and a bogus one, so the second will +trigger a transaction abort. This results in a crash with an stack +trace like this one: + + #0 0x00007fa1048b28df in __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 + #1 0x00007fa10489ccf5 in __GI_abort () at abort.c:79 + #2 0x00007fa10489cbc9 in __assert_fail_base + (fmt=0x7fa104a03300 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=0x557224014d30 "block.c", line=2240, function=) at assert.c:92 + #3 0x00007fa1048aae96 in __GI___assert_fail + (assertion=assertion@entry=0x5572240b44d8 "bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)", file=file@entry=0x557224014d30 "block.c", line=line@entry=2240, function=function@entry=0x5572240b5d60 <__PRETTY_FUNCTION__.31620> "bdrv_replace_child_noperm") at assert.c:101 + #4 0x0000557223e631f8 in bdrv_replace_child_noperm (child=0x557225b9c980, new_bs=new_bs@entry=0x557225c42e40) at block.c:2240 + #5 0x0000557223e68be7 in bdrv_replace_node (from=0x557226951a60, to=0x557225c42e40, errp=0x5572247d6138 ) at block.c:4196 + #6 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1731 + #7 0x0000557223d069c4 in external_snapshot_abort (common=0x557225d7e170) at blockdev.c:1717 + #8 0x0000557223d09013 in qmp_transaction (dev_list=, has_props=, props=0x557225cc7d70, errp=errp@entry=0x7ffe704c0c98) at blockdev.c:2360 + #9 0x0000557223e32085 in qmp_marshal_transaction (args=, ret=, errp=0x7ffe704c0d08) at qapi/qapi-commands-transaction.c:44 + #10 0x0000557223ee798c in do_qmp_dispatch (errp=0x7ffe704c0d00, allow_oob=, request=, cmds=0x5572247d3cc0 ) at qapi/qmp-dispatch.c:132 + #11 0x0000557223ee798c in qmp_dispatch (cmds=0x5572247d3cc0 , request=, allow_oob=) at qapi/qmp-dispatch.c:175 + #12 0x0000557223e06141 in monitor_qmp_dispatch (mon=0x557225c69ff0, req=) at monitor/qmp.c:120 + #13 0x0000557223e0678a in monitor_qmp_bh_dispatcher (data=) at monitor/qmp.c:209 + #14 0x0000557223f2f366 in aio_bh_call (bh=0x557225b9dc60) at util/async.c:117 + #15 0x0000557223f2f366 in aio_bh_poll (ctx=ctx@entry=0x557225b9c840) at util/async.c:117 + #16 0x0000557223f32754 in aio_dispatch (ctx=0x557225b9c840) at util/aio-posix.c:459 + #17 0x0000557223f2f242 in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 + #18 0x00007fa10913467d in g_main_dispatch (context=0x557225c28e80) at gmain.c:3176 + #19 0x00007fa10913467d in g_main_context_dispatch (context=context@entry=0x557225c28e80) at gmain.c:3829 + #20 0x0000557223f31808 in glib_pollfds_poll () at util/main-loop.c:219 + #21 0x0000557223f31808 in os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #22 0x0000557223f31808 in main_loop_wait (nonblocking=) at util/main-loop.c:518 + #23 0x0000557223d13201 in main_loop () at vl.c:1828 + #24 0x0000557223bbfb82 in main (argc=, argv=, envp=) at vl.c:4504 + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1779036 +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +--- + blockdev.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index 5088541591..79112be2e6 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1774,6 +1774,8 @@ static void external_snapshot_abort(BlkActionState *common) + if (state->new_bs) { + if (state->overlay_appended) { + AioContext *aio_context; ++ AioContext *tmp_context; ++ int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); + aio_context_acquire(aio_context); +@@ -1781,6 +1783,25 @@ static void external_snapshot_abort(BlkActionState *common) + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ + bdrv_set_backing_hd(state->new_bs, NULL, &error_abort); ++ ++ /* ++ * The call to bdrv_set_backing_hd() above returns state->old_bs to ++ * the main AioContext. As we're still going to be using it, return ++ * it to the AioContext it was before. ++ */ ++ tmp_context = bdrv_get_aio_context(state->old_bs); ++ if (aio_context != tmp_context) { ++ aio_context_release(aio_context); ++ aio_context_acquire(tmp_context); ++ ++ ret = bdrv_try_set_aio_context(state->old_bs, ++ aio_context, NULL); ++ assert(ret == 0); ++ ++ aio_context_release(tmp_context); ++ aio_context_acquire(aio_context); ++ } ++ + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ + +-- +2.27.0 + diff --git a/blockdev-backup-utilize-do_backup_common.patch b/blockdev-backup-utilize-do_backup_common.patch new file mode 100644 index 0000000000000000000000000000000000000000..6827b221978c2646d97f15e62b42b8eb34282bea --- /dev/null +++ b/blockdev-backup-utilize-do_backup_common.patch @@ -0,0 +1,105 @@ +From e5456acf2332efd0ed6106eb13cf24e6bca1ee64 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:52 -0400 +Subject: [PATCH] blockdev-backup: utilize do_backup_common + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190709232550.10724-4-jsnow@redhat.com +Signed-off-by: John Snow +--- + blockdev.c | 65 +++++------------------------------------------------- + 1 file changed, 6 insertions(+), 59 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index a29838a1c8..aa15ed1f00 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3668,78 +3668,25 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, + { + BlockDriverState *bs; + BlockDriverState *target_bs; +- Error *local_err = NULL; +- BdrvDirtyBitmap *bmap = NULL; + AioContext *aio_context; +- BlockJob *job = NULL; +- int job_flags = JOB_DEFAULT; +- int ret; +- +- if (!backup->has_speed) { +- backup->speed = 0; +- } +- if (!backup->has_on_source_error) { +- backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT; +- } +- if (!backup->has_on_target_error) { +- backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT; +- } +- if (!backup->has_job_id) { +- backup->job_id = NULL; +- } +- if (!backup->has_auto_finalize) { +- backup->auto_finalize = true; +- } +- if (!backup->has_auto_dismiss) { +- backup->auto_dismiss = true; +- } +- if (!backup->has_compress) { +- backup->compress = false; +- } ++ BlockJob *job; + + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return NULL; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); + if (!target_bs) { +- goto out; ++ return NULL; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- goto out; +- } ++ aio_context = bdrv_get_aio_context(bs); ++ aio_context_acquire(aio_context); + +- if (backup->has_bitmap) { +- bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); +- if (!bmap) { +- error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); +- goto out; +- } +- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { +- goto out; +- } +- } ++ job = do_backup_common(qapi_BlockdevBackup_base(backup), ++ bs, target_bs, aio_context, txn, errp); + +- if (!backup->auto_finalize) { +- job_flags |= JOB_MANUAL_FINALIZE; +- } +- if (!backup->auto_dismiss) { +- job_flags |= JOB_MANUAL_DISMISS; +- } +- job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, +- backup->sync, bmap, backup->compress, +- backup->on_source_error, backup->on_target_error, +- job_flags, NULL, NULL, txn, &local_err); +- if (local_err != NULL) { +- error_propagate(errp, local_err); +- } +-out: + aio_context_release(aio_context); + return job; + } +-- +2.27.0 + diff --git a/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch b/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch new file mode 100644 index 0000000000000000000000000000000000000000..e915b05a415c2e2fc76ac0e58be7cc819f457533 --- /dev/null +++ b/blockdev-fix-coding-style-issues-in-drive_backup_pre.patch @@ -0,0 +1,44 @@ +From ffbf1e237d0311512c411e195278e69d710fb9cf Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 8 Jan 2020 15:31:31 +0100 +Subject: [PATCH] blockdev: fix coding style issues in drive_backup_prepare + +Fix a couple of minor coding style issues in drive_backup_prepare. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + blockdev.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4435795b6d..99b1cafb8f 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3597,7 +3597,7 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + if (!backup->has_format) { + backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char*) bs->drv->format_name; ++ NULL : (char *) bs->drv->format_name; + } + + /* Early check to avoid creating target */ +@@ -3607,8 +3607,10 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + + flags = bs->open_flags | BDRV_O_RDWR; + +- /* See if we have a backing HD we can use to create our new image +- * on top of. */ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ + if (backup->sync == MIRROR_SYNC_MODE_TOP) { + source = backing_bs(bs); + if (!source) { +-- +2.27.0 + diff --git a/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch b/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch new file mode 100644 index 0000000000000000000000000000000000000000..970057179e504e1961c766d0a68107816c750721 --- /dev/null +++ b/blockdev-honor-bdrv_try_set_aio_context-context-requ.patch @@ -0,0 +1,191 @@ +From 64c6b3b911f65c19f3a235c8394f5db894c1ee6a Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 8 Jan 2020 15:31:34 +0100 +Subject: [PATCH] blockdev: honor bdrv_try_set_aio_context() context + requirements + +bdrv_try_set_aio_context() requires that the old context is held, and +the new context is not held. Fix all the occurrences where it's not +done this way. + +Suggested-by: Max Reitz +Signed-off-by: Sergio Lopez +Signed-off-by: Kevin Wolf +--- + blockdev.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 60 insertions(+), 8 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index d3309c205a..5088541591 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1578,6 +1578,7 @@ static void external_snapshot_prepare(BlkActionState *common, + DO_UPCAST(ExternalSnapshotState, common, common); + TransactionAction *action = common->action; + AioContext *aio_context; ++ AioContext *old_context; + int ret; + + /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar +@@ -1718,7 +1719,16 @@ static void external_snapshot_prepare(BlkActionState *common, + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(state->new_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(state->new_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +@@ -1818,11 +1828,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; ++ AioContext *old_context; + QDict *options; + Error *local_err = NULL; + int flags; + int64_t size; + bool set_backing_hd = false; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; +@@ -1911,6 +1923,21 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp) + goto out; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ bdrv_unref(target_bs); ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { +@@ -1990,6 +2017,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; ++ int ret; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -2004,7 +2033,18 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_acquire(old_context); ++ ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ aio_context_release(old_context); ++ return; ++ } ++ ++ aio_context_release(old_context); + aio_context_acquire(aio_context); + state->bs = bs; + +@@ -3562,7 +3602,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; + int job_flags = JOB_DEFAULT; +- int ret; + + if (!backup->has_speed) { + backup->speed = 0; +@@ -3586,11 +3625,6 @@ static BlockJob *do_backup_common(BackupCommon *backup, + backup->compress = false; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- return NULL; +- } +- + if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || + (backup->sync == MIRROR_SYNC_MODE_INCREMENTAL)) { + /* done before desugaring 'incremental' to print the right message */ +@@ -3802,6 +3836,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3914,12 +3949,22 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + (arg->mode == NEW_IMAGE_MODE_EXISTING || + !bdrv_has_zero_init(target_bs))); + ++ ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); ++ aio_context_release(aio_context); ++ aio_context_acquire(old_context); ++ + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- goto out; ++ aio_context_release(old_context); ++ return; + } + ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3961,6 +4006,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; ++ AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + Error *local_err = NULL; + bool zero_target; +@@ -3978,10 +4024,16 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + ++ /* Honor bdrv_try_set_aio_context() context acquisition requirements. */ ++ old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); ++ aio_context_acquire(old_context); + + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ ++ aio_context_release(old_context); ++ aio_context_acquire(aio_context); ++ + if (ret < 0) { + goto out; + } +-- +2.27.0 + diff --git a/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch b/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch new file mode 100644 index 0000000000000000000000000000000000000000..84e29fffabed38135f24a1e83cbe12538481dbac --- /dev/null +++ b/blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch @@ -0,0 +1,131 @@ +From 6d89e4923e9c341975dbfdd2bae153ba367a1b79 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 8 Jan 2020 15:31:33 +0100 +Subject: [PATCH] blockdev: unify qmp_blockdev_backup and blockdev-backup + transaction paths + +Issuing a blockdev-backup from qmp_blockdev_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_blockdev_backup() and +blockdev_backup_prepare(). + +This change unifies both paths, merging do_blockdev_backup() and +blockdev_backup_prepare(), and changing qmp_blockdev_backup() to +create a transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_blockdev_backup() is executed inside a +drained section, as it happens when creating a blockdev-backup +transaction. This change is visible from the user's perspective, as +the job gets paused and immediately resumed before starting the actual +work. + +Signed-off-by: Sergio Lopez +Reviewed-by: Max Reitz +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + blockdev.c | 60 ++++++++++++------------------------------------------ + 1 file changed, 13 insertions(+), 47 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 7016054688..d3309c205a 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1983,16 +1983,13 @@ typedef struct BlockdevBackupState { + BlockJob *job; + } BlockdevBackupState; + +-static BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp); +- + static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + { + BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common); + BlockdevBackup *backup; +- BlockDriverState *bs, *target; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; + AioContext *aio_context; +- Error *local_err = NULL; + + assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP); + backup = common->action->u.blockdev_backup.data; +@@ -2002,8 +1999,8 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + return; + } + +- target = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target) { ++ target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); ++ if (!target_bs) { + return; + } + +@@ -2014,13 +2011,10 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp) + /* Paired with .clean() */ + bdrv_drained_begin(state->bs); + +- state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } ++ state->job = do_backup_common(qapi_BlockdevBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); + +-out: + aio_context_release(aio_context); + } + +@@ -3672,41 +3666,13 @@ XDbgBlockGraph *qmp_x_debug_query_block_graph(Error **errp) + return bdrv_get_xdbg_block_graph(errp); + } + +-BlockJob *do_blockdev_backup(BlockdevBackup *backup, JobTxn *txn, +- Error **errp) ++void qmp_blockdev_backup(BlockdevBackup *backup, Error **errp) + { +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- AioContext *aio_context; +- BlockJob *job; +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- target_bs = bdrv_lookup_bs(backup->target, backup->target, errp); +- if (!target_bs) { +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- job = do_backup_common(qapi_BlockdevBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp) +-{ +- BlockJob *job; +- job = do_blockdev_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP, ++ .u.blockdev_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + /* Parameter check and block job starting for drive mirroring. +-- +2.27.0 + diff --git a/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch b/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch new file mode 100644 index 0000000000000000000000000000000000000000..aefa05e921b7d66a09995716f8176367817d434c --- /dev/null +++ b/blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch @@ -0,0 +1,406 @@ +From 952f7f53cdd4320d1a0328481fa578dd199eb1ce Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 8 Jan 2020 15:31:32 +0100 +Subject: [PATCH] blockdev: unify qmp_drive_backup and drive-backup transaction + paths + +Issuing a drive-backup from qmp_drive_backup takes a slightly +different path than when it's issued from a transaction. In the code, +this is manifested as some redundancy between do_drive_backup() and +drive_backup_prepare(). + +This change unifies both paths, merging do_drive_backup() and +drive_backup_prepare(), and changing qmp_drive_backup() to create a +transaction instead of calling do_backup_common() direcly. + +As a side-effect, now qmp_drive_backup() is executed inside a drained +section, as it happens when creating a drive-backup transaction. This +change is visible from the user's perspective, as the job gets paused +and immediately resumed before starting the actual work. + +Also fix tests 141, 185 and 219 to cope with the extra +JOB_STATUS_CHANGE lines. + +Signed-off-by: Sergio Lopez +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + blockdev.c | 224 +++++++++++++++++-------------------- + tests/qemu-iotests/141.out | 2 + + tests/qemu-iotests/185.out | 2 + + tests/qemu-iotests/219 | 7 +- + tests/qemu-iotests/219.out | 8 ++ + 5 files changed, 117 insertions(+), 126 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 99b1cafb8f..7016054688 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1804,39 +1804,128 @@ typedef struct DriveBackupState { + BlockJob *job; + } DriveBackupState; + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp); ++static BlockJob *do_backup_common(BackupCommon *backup, ++ BlockDriverState *bs, ++ BlockDriverState *target_bs, ++ AioContext *aio_context, ++ JobTxn *txn, Error **errp); + + static void drive_backup_prepare(BlkActionState *common, Error **errp) + { + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); +- BlockDriverState *bs; + DriveBackup *backup; ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; ++ BlockDriverState *source = NULL; + AioContext *aio_context; ++ QDict *options; + Error *local_err = NULL; ++ int flags; ++ int64_t size; ++ bool set_backing_hd = false; + + assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->u.drive_backup.data; + ++ if (!backup->has_mode) { ++ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ++ } ++ + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return; + } + ++ if (!bs->drv) { ++ error_setg(errp, "Device has no medium"); ++ return; ++ } ++ + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + +- state->bs = bs; ++ if (!backup->has_format) { ++ backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? ++ NULL : (char *) bs->drv->format_name; ++ } ++ ++ /* Early check to avoid creating target */ ++ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { ++ goto out; ++ } ++ ++ flags = bs->open_flags | BDRV_O_RDWR; ++ ++ /* ++ * See if we have a backing HD we can use to create our new image ++ * on top of. ++ */ ++ if (backup->sync == MIRROR_SYNC_MODE_TOP) { ++ source = backing_bs(bs); ++ if (!source) { ++ backup->sync = MIRROR_SYNC_MODE_FULL; ++ } ++ } ++ if (backup->sync == MIRROR_SYNC_MODE_NONE) { ++ source = bs; ++ flags |= BDRV_O_NO_BACKING; ++ set_backing_hd = true; ++ } ++ ++ size = bdrv_getlength(bs); ++ if (size < 0) { ++ error_setg_errno(errp, -size, "bdrv_getlength failed"); ++ goto out; ++ } ++ ++ if (backup->mode != NEW_IMAGE_MODE_EXISTING) { ++ assert(backup->format); ++ if (source) { ++ bdrv_refresh_filename(source); ++ bdrv_img_create(backup->target, backup->format, source->filename, ++ source->drv->format_name, NULL, ++ size, flags, false, &local_err); ++ } else { ++ bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, ++ size, flags, false, &local_err); ++ } ++ } + +- state->job = do_drive_backup(backup, common->block_job_txn, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + ++ options = qdict_new(); ++ qdict_put_str(options, "discard", "unmap"); ++ qdict_put_str(options, "detect-zeroes", "unmap"); ++ if (backup->format) { ++ qdict_put_str(options, "driver", backup->format); ++ } ++ ++ target_bs = bdrv_open(backup->target, NULL, options, flags, errp); ++ if (!target_bs) { ++ goto out; ++ } ++ ++ if (set_backing_hd) { ++ bdrv_set_backing_hd(target_bs, source, &local_err); ++ if (local_err) { ++ goto unref; ++ } ++ } ++ ++ state->bs = bs; ++ ++ state->job = do_backup_common(qapi_DriveBackup_base(backup), ++ bs, target_bs, aio_context, ++ common->block_job_txn, errp); ++ ++unref: ++ bdrv_unref(target_bs); + out: + aio_context_release(aio_context); + } +@@ -3564,126 +3653,13 @@ static BlockJob *do_backup_common(BackupCommon *backup, + return job; + } + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp) +-{ +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- BlockDriverState *source = NULL; +- BlockJob *job = NULL; +- AioContext *aio_context; +- QDict *options; +- Error *local_err = NULL; +- int flags; +- int64_t size; +- bool set_backing_hd = false; +- +- if (!backup->has_mode) { +- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +- } +- +- bs = bdrv_lookup_bs(backup->device, backup->device, errp); +- if (!bs) { +- return NULL; +- } +- +- if (!bs->drv) { +- error_setg(errp, "Device has no medium"); +- return NULL; +- } +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- +- if (!backup->has_format) { +- backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ? +- NULL : (char *) bs->drv->format_name; +- } +- +- /* Early check to avoid creating target */ +- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { +- goto out; +- } +- +- flags = bs->open_flags | BDRV_O_RDWR; +- +- /* +- * See if we have a backing HD we can use to create our new image +- * on top of. +- */ +- if (backup->sync == MIRROR_SYNC_MODE_TOP) { +- source = backing_bs(bs); +- if (!source) { +- backup->sync = MIRROR_SYNC_MODE_FULL; +- } +- } +- if (backup->sync == MIRROR_SYNC_MODE_NONE) { +- source = bs; +- flags |= BDRV_O_NO_BACKING; +- set_backing_hd = true; +- } +- +- size = bdrv_getlength(bs); +- if (size < 0) { +- error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; +- } +- +- if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +- assert(backup->format); +- if (source) { +- bdrv_refresh_filename(source); +- bdrv_img_create(backup->target, backup->format, source->filename, +- source->drv->format_name, NULL, +- size, flags, false, &local_err); +- } else { +- bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, +- size, flags, false, &local_err); +- } +- } +- +- if (local_err) { +- error_propagate(errp, local_err); +- goto out; +- } +- +- options = qdict_new(); +- qdict_put_str(options, "discard", "unmap"); +- qdict_put_str(options, "detect-zeroes", "unmap"); +- if (backup->format) { +- qdict_put_str(options, "driver", backup->format); +- } +- +- target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- if (!target_bs) { +- goto out; +- } +- +- if (set_backing_hd) { +- bdrv_set_backing_hd(target_bs, source, &local_err); +- if (local_err) { +- goto unref; +- } +- } +- +- job = do_backup_common(qapi_DriveBackup_base(backup), +- bs, target_bs, aio_context, txn, errp); +- +-unref: +- bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); +- return job; +-} +- +-void qmp_drive_backup(DriveBackup *arg, Error **errp) ++void qmp_drive_backup(DriveBackup *backup, Error **errp) + { +- +- BlockJob *job; +- job = do_drive_backup(arg, NULL, errp); +- if (job) { +- job_start(&job->job); +- } ++ TransactionAction action = { ++ .type = TRANSACTION_ACTION_KIND_DRIVE_BACKUP, ++ .u.drive_backup.data = backup, ++ }; ++ blockdev_do_action(&action, errp); + } + + BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 4d71d9dcae..07e0ec66d7 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -10,6 +10,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m. + Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "Node drv0 is in use"}} + {"return": {}} +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index ddfbf3c765..a233be7f58 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -51,6 +51,8 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 l + Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 cluster_size=65536 lazy_refcounts=off refcount_bits=16 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +diff --git a/tests/qemu-iotests/219 b/tests/qemu-iotests/219 +index e0c51662c0..655f54d881 100755 +--- a/tests/qemu-iotests/219 ++++ b/tests/qemu-iotests/219 +@@ -63,7 +63,7 @@ def test_pause_resume(vm): + # logged immediately + iotests.log(vm.qmp('query-jobs')) + +-def test_job_lifecycle(vm, job, job_args, has_ready=False): ++def test_job_lifecycle(vm, job, job_args, has_ready=False, is_mirror=False): + global img_size + + iotests.log('') +@@ -135,6 +135,9 @@ def test_job_lifecycle(vm, job, job_args, has_ready=False): + iotests.log('Waiting for PENDING state...') + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ if is_mirror: ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) ++ iotests.log(iotests.filter_qmp_event(vm.event_wait('JOB_STATUS_CHANGE'))) + + if not job_args.get('auto-finalize', True): + # PENDING state: +@@ -218,7 +221,7 @@ with iotests.FilePath('disk.img') as disk_path, \ + + for auto_finalize in [True, False]: + for auto_dismiss in [True, False]: +- test_job_lifecycle(vm, 'drive-backup', job_args={ ++ test_job_lifecycle(vm, 'drive-backup', is_mirror=True, job_args={ + 'device': 'drive0-node', + 'target': copy_path, + 'sync': 'full', +diff --git a/tests/qemu-iotests/219.out b/tests/qemu-iotests/219.out +index 8ebd3fee60..0ea5d0b9d5 100644 +--- a/tests/qemu-iotests/219.out ++++ b/tests/qemu-iotests/219.out +@@ -135,6 +135,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -186,6 +188,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "concluded"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +@@ -245,6 +249,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +@@ -304,6 +310,8 @@ Pause/resume in RUNNING + {"return": {}} + + Waiting for PENDING state... ++{"data": {"id": "job0", "status": "paused"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"data": {"id": "job0", "status": "running"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "waiting"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"data": {"id": "job0", "status": "pending"}, "event": "JOB_STATUS_CHANGE", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} + {"return": [{"current-progress": 4194304, "id": "job0", "status": "pending", "total-progress": 4194304, "type": "backup"}]} +-- +2.27.0 + diff --git a/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch b/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..2efef7276e0463b863265039dccdc0afd3aee834 --- /dev/null +++ b/blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch @@ -0,0 +1,114 @@ +From e37cda3452309d147f1f7aec3c74249001e3db0c Mon Sep 17 00:00:00 2001 +From: Michael Qiu +Date: Wed, 12 May 2021 21:54:37 +0800 +Subject: [PATCH] blockjob: Fix crash with IOthread when block commit after + snapshot + +Currently, if guest has workloads, IO thread will acquire aio_context +lock before do io_submit, it leads to segmentfault when do block commit +after snapshot. Just like below: + +Program received signal SIGSEGV, Segmentation fault. + +[Switching to Thread 0x7f7c7d91f700 (LWP 99907)] +0x00005576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437 +1437 ../block/mirror.c: No such file or directory. +(gdb) p s->job +$17 = (MirrorBlockJob *) 0x0 +(gdb) p s->stop +$18 = false + +Call trace of IO thread: +0 0x00005576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437 +1 0x00005576d0f7f3ab in bdrv_driver_pwritev at ../block/io.c:1174 +2 0x00005576d0f8139d in bdrv_aligned_pwritev at ../block/io.c:1988 +3 0x00005576d0f81b65 in bdrv_co_pwritev_part at ../block/io.c:2156 +4 0x00005576d0f8e6b7 in blk_do_pwritev_part at ../block/block-backend.c:1260 +5 0x00005576d0f8e84d in blk_aio_write_entry at ../block/block-backend.c:1476 +... + +Switch to qemu main thread: +0 0x00007f903be704ed in __lll_lock_wait at +/lib/../lib64/libpthread.so.0 +1 0x00007f903be6bde6 in _L_lock_941 at /lib/../lib64/libpthread.so.0 +2 0x00007f903be6bcdf in pthread_mutex_lock at +/lib/../lib64/libpthread.so.0 +3 0x0000564b21456889 in qemu_mutex_lock_impl at +../util/qemu-thread-posix.c:79 +4 0x0000564b213af8a5 in block_job_add_bdrv at ../blockjob.c:224 +5 0x0000564b213b00ad in block_job_create at ../blockjob.c:440 +6 0x0000564b21357c0a in mirror_start_job at ../block/mirror.c:1622 +7 0x0000564b2135a9af in commit_active_start at ../block/mirror.c:1867 +8 0x0000564b2133d132 in qmp_block_commit at ../blockdev.c:2768 +9 0x0000564b2141fef3 in qmp_marshal_block_commit at +qapi/qapi-commands-block-core.c:346 +10 0x0000564b214503c9 in do_qmp_dispatch_bh at +../qapi/qmp-dispatch.c:110 +11 0x0000564b21451996 in aio_bh_poll at ../util/async.c:164 +12 0x0000564b2146018e in aio_dispatch at ../util/aio-posix.c:381 +13 0x0000564b2145187e in aio_ctx_dispatch at ../util/async.c:306 +14 0x00007f9040239049 in g_main_context_dispatch at +/lib/../lib64/libglib-2.0.so.0 +15 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:232 +16 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:255 +17 0x0000564b21447368 in main_loop_wait at ../util/main-loop.c:531 +18 0x0000564b212304e1 in qemu_main_loop at ../softmmu/runstate.c:721 +19 0x0000564b20f7975e in main at ../softmmu/main.c:50 + +In IO thread when do bdrv_mirror_top_pwritev, the job is NULL, and stop field +is false, this means the MirrorBDSOpaque "s" object has not been initialized +yet, and this object is initialized by block_job_create(), but the initialize +process is stuck in acquiring the lock. + +In this situation, IO thread come to bdrv_mirror_top_pwritev(),which means that +mirror-top node is already inserted into block graph, but its bs->opaque->job +is not initialized. + +The root cause is that qemu main thread do release/acquire when hold the lock, +at the same time, IO thread get the lock after release stage, and the crash +occured. + +Actually, in this situation, job->job.aio_context will not equal to +qemu_get_aio_context(), and will be the same as bs->aio_context, +thus, no need to release the lock, becasue bdrv_root_attach_child() +will not change the context. + +This patch fix this issue. + +Fixes: 132ada80 "block: Adjust AioContexts when attaching nodes" + +Signed-off-by: Michael Qiu +Message-Id: <20210203024059.52683-1-08005325@163.com> +Signed-off-by: Kevin Wolf +--- + blockjob.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/blockjob.c b/blockjob.c +index 74abb97bfd..72865a4a6e 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -223,14 +223,18 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) + { + BdrvChild *c; ++ bool need_context_ops; + + bdrv_ref(bs); +- if (job->job.aio_context != qemu_get_aio_context()) { ++ ++ need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context; ++ ++ if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { + aio_context_release(job->job.aio_context); + } + c = bdrv_root_attach_child(bs, name, &child_job, job->job.aio_context, + perm, shared_perm, job, errp); +- if (job->job.aio_context != qemu_get_aio_context()) { ++ if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) { + aio_context_acquire(job->job.aio_context); + } + if (c == NULL) { +-- +2.27.0 + diff --git a/blockjob-update-nodes-head-while-removing-all-bdrv.patch b/blockjob-update-nodes-head-while-removing-all-bdrv.patch new file mode 100644 index 0000000000000000000000000000000000000000..36cedc77f7d38b124263a6f5d09e5f1dc97de5b8 --- /dev/null +++ b/blockjob-update-nodes-head-while-removing-all-bdrv.patch @@ -0,0 +1,61 @@ +From 86b0f4022bb43b16979ba5300e8d40a1e6d44b79 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Wed, 11 Sep 2019 12:03:16 +0200 +Subject: [PATCH] blockjob: update nodes head while removing all bdrv + +block_job_remove_all_bdrv() iterates through job->nodes, calling +bdrv_root_unref_child() for each entry. The call to the latter may +reach child_job_[can_]set_aio_ctx(), which will also attempt to +traverse job->nodes, potentially finding entries that where freed +on previous iterations. + +To avoid this situation, update job->nodes head on each iteration to +ensure that already freed entries are no longer linked to the list. + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1746631 +Signed-off-by: Sergio Lopez +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20190911100316.32282-1-mreitz@redhat.com +Reviewed-by: Sergio Lopez +Signed-off-by: Max Reitz +(cherry picked from commit d876bf676f5e7c6aa9ac64555e48cba8734ecb2f) +Signed-off-by: Michael Roth +--- + blockjob.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/blockjob.c b/blockjob.c +index 20b7f557da..74abb97bfd 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -186,14 +186,23 @@ static const BdrvChildRole child_job = { + + void block_job_remove_all_bdrv(BlockJob *job) + { +- GSList *l; +- for (l = job->nodes; l; l = l->next) { ++ /* ++ * bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(), ++ * which will also traverse job->nodes, so consume the list one by ++ * one to make sure that such a concurrent access does not attempt ++ * to process an already freed BdrvChild. ++ */ ++ while (job->nodes) { ++ GSList *l = job->nodes; + BdrvChild *c = l->data; ++ ++ job->nodes = l->next; ++ + bdrv_op_unblock_all(c->bs, job->blocker); + bdrv_root_unref_child(c); ++ ++ g_slist_free_1(l); + } +- g_slist_free(job->nodes); +- job->nodes = NULL; + } + + bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +-- +2.23.0 diff --git a/bootp-check-bootp_input-buffer-size.patch b/bootp-check-bootp_input-buffer-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c4829eb123bc2a850da3660d22b93b641b6e73f --- /dev/null +++ b/bootp-check-bootp_input-buffer-size.patch @@ -0,0 +1,36 @@ +From ab454ca5f45a842b2517a0f4eb786b6ea3019d5a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 16:15:14 +0400 +Subject: [PATCH 3/6] bootp: check bootp_input buffer size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes: CVE-2021-3592 +Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/44 + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/bootp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +index 57543271..57891871 100644 +--- a/slirp/src/bootp.c ++++ b/slirp/src/bootp.c +@@ -366,9 +366,9 @@ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); + + void bootp_input(struct mbuf *m) + { +- struct bootp_t *bp = mtod(m, struct bootp_t *); ++ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); + +- if (bp->bp_op == BOOTP_REQUEST) { ++ if (bp && bp->bp_op == BOOTP_REQUEST) { + bootp_reply(m->slirp, bp, m_end(m)); + } + } +-- +2.27.0 + diff --git a/bootp-limit-vendor-specific-area-to-input-packet-mem.patch b/bootp-limit-vendor-specific-area-to-input-packet-mem.patch new file mode 100644 index 0000000000000000000000000000000000000000..07fe5cfd429f37a224d8910cecd7690c537d84c6 --- /dev/null +++ b/bootp-limit-vendor-specific-area-to-input-packet-mem.patch @@ -0,0 +1,161 @@ +From 3369988416bd98e97dc3d0400af165c0d9e536e0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 19:25:28 +0400 +Subject: [PATCH 2/6] bootp: limit vendor-specific area to input packet memory + buffer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +sizeof(bootp_t) currently holds DHCP_OPT_LEN. Remove this optional field +from the structure, to help with the following patch checking for +minimal header size. Modify the bootp_reply() function to take the +buffer boundaries and avoiding potential buffer overflow. + +Related to CVE-2021-3592. + +https://gitlab.freedesktop.org/slirp/libslirp/-/issues/44 + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/bootp.c | 26 +++++++++++++++----------- + slirp/src/bootp.h | 2 +- + slirp/src/mbuf.c | 5 +++++ + slirp/src/mbuf.h | 1 + + 4 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +index 3f9ce255..57543271 100644 +--- a/slirp/src/bootp.c ++++ b/slirp/src/bootp.c +@@ -92,21 +92,22 @@ found: + return bc; + } + +-static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, ++static void dhcp_decode(const struct bootp_t *bp, ++ const uint8_t *bp_end, ++ int *pmsg_type, + struct in_addr *preq_addr) + { +- const uint8_t *p, *p_end; ++ const uint8_t *p; + int len, tag; + + *pmsg_type = 0; + preq_addr->s_addr = htonl(0L); + + p = bp->bp_vend; +- p_end = p + DHCP_OPT_LEN; + if (memcmp(p, rfc1533_cookie, 4) != 0) + return; + p += 4; +- while (p < p_end) { ++ while (p < bp_end) { + tag = p[0]; + if (tag == RFC1533_PAD) { + p++; +@@ -114,10 +115,10 @@ static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, + break; + } else { + p++; +- if (p >= p_end) ++ if (p >= bp_end) + break; + len = *p++; +- if (p + len > p_end) { ++ if (p + len > bp_end) { + break; + } + DPRINTF("dhcp: tag=%d len=%d\n", tag, len); +@@ -144,7 +145,9 @@ static void dhcp_decode(const struct bootp_t *bp, int *pmsg_type, + } + } + +-static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) ++static void bootp_reply(Slirp *slirp, ++ const struct bootp_t *bp, ++ const uint8_t *bp_end) + { + BOOTPClient *bc = NULL; + struct mbuf *m; +@@ -157,7 +160,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) + uint8_t client_ethaddr[ETH_ALEN]; + + /* extract exact DHCP msg type */ +- dhcp_decode(bp, &dhcp_msg_type, &preq_addr); ++ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); + DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); + if (preq_addr.s_addr != htonl(0L)) + DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); +@@ -179,9 +182,10 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) + return; + } + m->m_data += IF_MAXLINKHDR; ++ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); + rbp = (struct bootp_t *)m->m_data; + m->m_data += sizeof(struct udpiphdr); +- memset(rbp, 0, sizeof(struct bootp_t)); ++ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); + + if (dhcp_msg_type == DHCPDISCOVER) { + if (preq_addr.s_addr != htonl(0L)) { +@@ -235,7 +239,7 @@ static void bootp_reply(Slirp *slirp, const struct bootp_t *bp) + rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ + + q = rbp->bp_vend; +- end = (uint8_t *)&rbp[1]; ++ end = rbp->bp_vend + DHCP_OPT_LEN; + memcpy(q, rfc1533_cookie, 4); + q += 4; + +@@ -365,6 +369,6 @@ void bootp_input(struct mbuf *m) + struct bootp_t *bp = mtod(m, struct bootp_t *); + + if (bp->bp_op == BOOTP_REQUEST) { +- bootp_reply(m->slirp, bp); ++ bootp_reply(m->slirp, bp, m_end(m)); + } + } +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +index 03ece9bf..0d20a944 100644 +--- a/slirp/src/bootp.h ++++ b/slirp/src/bootp.h +@@ -114,7 +114,7 @@ struct bootp_t { + uint8_t bp_hwaddr[16]; + uint8_t bp_sname[64]; + uint8_t bp_file[128]; +- uint8_t bp_vend[DHCP_OPT_LEN]; ++ uint8_t bp_vend[]; + }; + + typedef struct { +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +index 6d0653ed..7db07c08 100644 +--- a/slirp/src/mbuf.c ++++ b/slirp/src/mbuf.c +@@ -233,3 +233,8 @@ void *mtod_check(struct mbuf *m, size_t len) + + return NULL; + } ++ ++void *m_end(struct mbuf *m) ++{ ++ return m->m_data + m->m_len; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +index 2015e323..a9752a36 100644 +--- a/slirp/src/mbuf.h ++++ b/slirp/src/mbuf.h +@@ -119,6 +119,7 @@ void m_adj(struct mbuf *, int); + int m_copy(struct mbuf *, struct mbuf *, int, int); + struct mbuf *dtom(Slirp *, void *); + void *mtod_check(struct mbuf *, size_t len); ++void *m_end(struct mbuf *); + + static inline void ifs_init(struct mbuf *ifm) + { +-- +2.27.0 + diff --git a/bt-use-size_t-type-for-length-parameters-instead-of-.patch b/bt-use-size_t-type-for-length-parameters-instead-of-.patch new file mode 100644 index 0000000000000000000000000000000000000000..2005979aec4f4401b512bd0ea72d6c12493f5ea1 --- /dev/null +++ b/bt-use-size_t-type-for-length-parameters-instead-of-.patch @@ -0,0 +1,794 @@ +From f9ab92373813cfccd31f29c0d963232f65cb5f88 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Fri, 22 May 2020 12:22:26 +0800 +Subject: [PATCH] bt: use size_t type for length parameters instead of int + +From: Prasad J Pandit + +The length parameter values are not negative, thus use an unsigned +type 'size_t' for them. Many routines pass 'len' values to memcpy(3) +calls. If it was negative, it could lead to memory corruption issues. +Add check to avoid it. + +Reported-by: Arash TC +Signed-off-by: Prasad J Pandit + +diff --git a/bt-host.c b/bt-host.c +index 2f8f631..b73a44d 100644 +--- a/bt-host.c ++++ b/bt-host.c +@@ -43,7 +43,7 @@ struct bt_host_hci_s { + }; + + static void bt_host_send(struct HCIInfo *hci, +- int type, const uint8_t *data, int len) ++ int type, const uint8_t *data, size_t len) + { + struct bt_host_hci_s *s = (struct bt_host_hci_s *) hci; + uint8_t pkt = type; +@@ -63,17 +63,17 @@ static void bt_host_send(struct HCIInfo *hci, + } + } + +-static void bt_host_cmd(struct HCIInfo *hci, const uint8_t *data, int len) ++static void bt_host_cmd(struct HCIInfo *hci, const uint8_t *data, size_t len) + { + bt_host_send(hci, HCI_COMMAND_PKT, data, len); + } + +-static void bt_host_acl(struct HCIInfo *hci, const uint8_t *data, int len) ++static void bt_host_acl(struct HCIInfo *hci, const uint8_t *data, size_t len) + { + bt_host_send(hci, HCI_ACLDATA_PKT, data, len); + } + +-static void bt_host_sco(struct HCIInfo *hci, const uint8_t *data, int len) ++static void bt_host_sco(struct HCIInfo *hci, const uint8_t *data, size_t len) + { + bt_host_send(hci, HCI_SCODATA_PKT, data, len); + } +diff --git a/bt-vhci.c b/bt-vhci.c +index 886e146..32ef1c5 100644 +--- a/bt-vhci.c ++++ b/bt-vhci.c +@@ -89,7 +89,7 @@ static void vhci_read(void *opaque) + } + + static void vhci_host_send(void *opaque, +- int type, const uint8_t *data, int len) ++ int type, const uint8_t *data, size_t len) + { + struct bt_vhci_s *s = (struct bt_vhci_s *) opaque; + #if 0 +@@ -112,6 +112,7 @@ static void vhci_host_send(void *opaque, + static uint8_t buf[4096]; + + buf[0] = type; ++ assert(len < sizeof(buf)); + memcpy(buf + 1, data, len); + + while (write(s->fd, buf, len + 1) < 0) +@@ -124,13 +125,13 @@ static void vhci_host_send(void *opaque, + } + + static void vhci_out_hci_packet_event(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + vhci_host_send(opaque, HCI_EVENT_PKT, data, len); + } + + static void vhci_out_hci_packet_acl(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + vhci_host_send(opaque, HCI_ACLDATA_PKT, data, len); + } +diff --git a/hw/bt/core.c b/hw/bt/core.c +index dfb196e..f548b3d 100644 +--- a/hw/bt/core.c ++++ b/hw/bt/core.c +@@ -44,7 +44,7 @@ static void bt_dummy_lmp_disconnect_master(struct bt_link_s *link) + } + + static void bt_dummy_lmp_acl_resp(struct bt_link_s *link, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + error_report("%s: stray ACL response PDU, fixme", __func__); + exit(-1); +diff --git a/hw/bt/hci-csr.c b/hw/bt/hci-csr.c +index 3d60654..f7a74c0 100644 +--- a/hw/bt/hci-csr.c ++++ b/hw/bt/hci-csr.c +@@ -103,7 +103,7 @@ static inline void csrhci_fifo_wake(struct csrhci_s *s) + } + + #define csrhci_out_packetz(s, len) memset(csrhci_out_packet(s, len), 0, len) +-static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) ++static uint8_t *csrhci_out_packet(struct csrhci_s *s, size_t len) + { + int off = s->out_start + s->out_len; + +@@ -112,14 +112,14 @@ static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) + + if (off < FIFO_LEN) { + if (off + len > FIFO_LEN && (s->out_size = off + len) > FIFO_LEN * 2) { +- error_report("%s: can't alloc %i bytes", __func__, len); ++ error_report("%s: can't alloc %zu bytes", __func__, len); + exit(-1); + } + return s->outfifo + off; + } + + if (s->out_len > s->out_size) { +- error_report("%s: can't alloc %i bytes", __func__, len); ++ error_report("%s: can't alloc %zu bytes", __func__, len); + exit(-1); + } + +@@ -127,7 +127,7 @@ static uint8_t *csrhci_out_packet(struct csrhci_s *s, int len) + } + + static inline uint8_t *csrhci_out_packet_csr(struct csrhci_s *s, +- int type, int len) ++ int type, size_t len) + { + uint8_t *ret = csrhci_out_packetz(s, len + 2); + +@@ -138,7 +138,7 @@ static inline uint8_t *csrhci_out_packet_csr(struct csrhci_s *s, + } + + static inline uint8_t *csrhci_out_packet_event(struct csrhci_s *s, +- int evt, int len) ++ int evt, size_t len) + { + uint8_t *ret = csrhci_out_packetz(s, + len + 1 + sizeof(struct hci_event_hdr)); +@@ -151,7 +151,7 @@ static inline uint8_t *csrhci_out_packet_event(struct csrhci_s *s, + } + + static void csrhci_in_packet_vendor(struct csrhci_s *s, int ocf, +- uint8_t *data, int len) ++ uint8_t *data, size_t len) + { + int offset; + uint8_t *rpkt; +@@ -320,18 +320,18 @@ static int csrhci_write(struct Chardev *chr, + struct csrhci_s *s = (struct csrhci_s *)chr; + int total = 0; + +- if (!s->enable) ++ if (!s->enable || len <= 0) + return 0; + + for (;;) { + int cnt = MIN(len, s->in_needed - s->in_len); +- if (cnt) { +- memcpy(s->inpkt + s->in_len, buf, cnt); +- s->in_len += cnt; +- buf += cnt; +- len -= cnt; +- total += cnt; +- } ++ assert(cnt > 0); ++ ++ memcpy(s->inpkt + s->in_len, buf, cnt); ++ s->in_len += cnt; ++ buf += cnt; ++ len -= cnt; ++ total += cnt; + + if (s->in_len < s->in_needed) { + break; +@@ -363,7 +363,7 @@ static int csrhci_write(struct Chardev *chr, + } + + static void csrhci_out_hci_packet_event(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + struct csrhci_s *s = (struct csrhci_s *) opaque; + uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1); /* Align */ +@@ -375,7 +375,7 @@ static void csrhci_out_hci_packet_event(void *opaque, + } + + static void csrhci_out_hci_packet_acl(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + struct csrhci_s *s = (struct csrhci_s *) opaque; + uint8_t *pkt = csrhci_out_packet(s, (len + 2) & ~1); /* Align */ +diff --git a/hw/bt/hci.c b/hw/bt/hci.c +index c7958f6..9c4f957 100644 +--- a/hw/bt/hci.c ++++ b/hw/bt/hci.c +@@ -31,7 +31,7 @@ + + struct bt_hci_s { + uint8_t *(*evt_packet)(void *opaque); +- void (*evt_submit)(void *opaque, int len); ++ void (*evt_submit)(void *opaque, size_t len); + void *opaque; + uint8_t evt_buf[256]; + +@@ -61,7 +61,7 @@ struct bt_hci_s { + struct bt_hci_master_link_s { + struct bt_link_s *link; + void (*lmp_acl_data)(struct bt_link_s *link, +- const uint8_t *data, int start, int len); ++ const uint8_t *data, int start, size_t len); + QEMUTimer *acl_mode_timer; + } handle[HCI_HANDLES_MAX]; + uint32_t role_bmp; +@@ -433,7 +433,7 @@ static const uint8_t bt_event_reserved_mask[8] = { + }; + + +-static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, int len) ++static void null_hci_send(struct HCIInfo *hci, const uint8_t *data, size_t len) + { + } + +@@ -451,13 +451,13 @@ struct HCIInfo null_hci = { + + + static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci, +- int evt, int len) ++ int evt, size_t len) + { + uint8_t *packet, mask; + int mask_byte; + + if (len > 255) { +- error_report("%s: HCI event params too long (%ib)", __func__, len); ++ error_report("%s: HCI event params too long (%zub)", __func__, len); + exit(-1); + } + +@@ -474,7 +474,7 @@ static inline uint8_t *bt_hci_event_start(struct bt_hci_s *hci, + } + + static inline void bt_hci_event(struct bt_hci_s *hci, int evt, +- void *params, int len) ++ void *params, size_t len) + { + uint8_t *packet = bt_hci_event_start(hci, evt, len); + +@@ -499,7 +499,7 @@ static inline void bt_hci_event_status(struct bt_hci_s *hci, int status) + } + + static inline void bt_hci_event_complete(struct bt_hci_s *hci, +- void *ret, int len) ++ void *ret, size_t len) + { + uint8_t *packet = bt_hci_event_start(hci, EVT_CMD_COMPLETE, + len + EVT_CMD_COMPLETE_SIZE); +@@ -1476,7 +1476,7 @@ static inline void bt_hci_event_num_comp_pkts(struct bt_hci_s *hci, + } + + static void bt_submit_hci(struct HCIInfo *info, +- const uint8_t *data, int length) ++ const uint8_t *data, size_t length) + { + struct bt_hci_s *hci = hci_from_info(info); + uint16_t cmd; +@@ -1970,7 +1970,7 @@ static void bt_submit_hci(struct HCIInfo *info, + break; + + short_hci: +- error_report("%s: HCI packet too short (%iB)", __func__, length); ++ error_report("%s: HCI packet too short (%zuB)", __func__, length); + bt_hci_event_status(hci, HCI_INVALID_PARAMETERS); + break; + } +@@ -1981,7 +1981,7 @@ static void bt_submit_hci(struct HCIInfo *info, + * know that a packet contained the last fragment of the SDU when the next + * SDU starts. */ + static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + struct hci_acl_hdr *pkt = (void *) hci->acl_buf; + +@@ -1989,7 +1989,7 @@ static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, + /* TODO: avoid memcpy'ing */ + + if (len + HCI_ACL_HDR_SIZE > sizeof(hci->acl_buf)) { +- error_report("%s: can't take ACL packets %i bytes long", ++ error_report("%s: can't take ACL packets %zu bytes long", + __func__, len); + return; + } +@@ -2003,7 +2003,7 @@ static inline void bt_hci_lmp_acl_data(struct bt_hci_s *hci, uint16_t handle, + } + + static void bt_hci_lmp_acl_data_slave(struct bt_link_s *btlink, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + struct bt_hci_link_s *link = (struct bt_hci_link_s *) btlink; + +@@ -2012,14 +2012,14 @@ static void bt_hci_lmp_acl_data_slave(struct bt_link_s *btlink, + } + + static void bt_hci_lmp_acl_data_host(struct bt_link_s *link, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + bt_hci_lmp_acl_data(hci_from_device(link->host), + link->handle, data, start, len); + } + + static void bt_submit_acl(struct HCIInfo *info, +- const uint8_t *data, int length) ++ const uint8_t *data, size_t length) + { + struct bt_hci_s *hci = hci_from_info(info); + uint16_t handle; +@@ -2027,7 +2027,7 @@ static void bt_submit_acl(struct HCIInfo *info, + struct bt_link_s *link; + + if (length < HCI_ACL_HDR_SIZE) { +- error_report("%s: ACL packet too short (%iB)", __func__, length); ++ error_report("%s: ACL packet too short (%zuB)", __func__, length); + return; + } + +@@ -2045,7 +2045,7 @@ static void bt_submit_acl(struct HCIInfo *info, + handle &= ~HCI_HANDLE_OFFSET; + + if (datalen > length) { +- error_report("%s: ACL packet too short (%iB < %iB)", ++ error_report("%s: ACL packet too short (%zuB < %iB)", + __func__, length, datalen); + return; + } +@@ -2087,7 +2087,7 @@ static void bt_submit_acl(struct HCIInfo *info, + } + + static void bt_submit_sco(struct HCIInfo *info, +- const uint8_t *data, int length) ++ const uint8_t *data, size_t length) + { + struct bt_hci_s *hci = hci_from_info(info); + uint16_t handle; +@@ -2106,7 +2106,7 @@ static void bt_submit_sco(struct HCIInfo *info, + } + + if (datalen > length) { +- error_report("%s: SCO packet too short (%iB < %iB)", ++ error_report("%s: SCO packet too short (%zuB < %iB)", + __func__, length, datalen); + return; + } +@@ -2127,7 +2127,7 @@ static uint8_t *bt_hci_evt_packet(void *opaque) + return s->evt_buf; + } + +-static void bt_hci_evt_submit(void *opaque, int len) ++static void bt_hci_evt_submit(void *opaque, size_t len) + { + /* TODO: notify upper layer */ + struct bt_hci_s *s = opaque; +diff --git a/hw/bt/hid.c b/hw/bt/hid.c +index 066ca99..fe15434 100644 +--- a/hw/bt/hid.c ++++ b/hw/bt/hid.c +@@ -95,7 +95,7 @@ struct bt_hid_device_s { + int data_type; + int intr_state; + struct { +- int len; ++ size_t len; + uint8_t buffer[1024]; + } dataother, datain, dataout, feature, intrdataout; + enum { +@@ -168,7 +168,7 @@ static void bt_hid_disconnect(struct bt_hid_device_s *s) + } + + static void bt_hid_send_data(struct bt_l2cap_conn_params_s *ch, int type, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + uint8_t *pkt, hdr = (BT_DATA << 4) | type; + int plen; +@@ -189,7 +189,7 @@ static void bt_hid_send_data(struct bt_l2cap_conn_params_s *ch, int type, + } + + static void bt_hid_control_transaction(struct bt_hid_device_s *s, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + uint8_t type, parameter; + int rlen, ret = -1; +@@ -361,7 +361,7 @@ static void bt_hid_control_transaction(struct bt_hid_device_s *s, + bt_hid_send_handshake(s, ret); + } + +-static void bt_hid_control_sdu(void *opaque, const uint8_t *data, int len) ++static void bt_hid_control_sdu(void *opaque, const uint8_t *data, size_t len) + { + struct bt_hid_device_s *hid = opaque; + +@@ -387,7 +387,7 @@ static void bt_hid_datain(HIDState *hs) + hid->datain.buffer, hid->datain.len); + } + +-static void bt_hid_interrupt_sdu(void *opaque, const uint8_t *data, int len) ++static void bt_hid_interrupt_sdu(void *opaque, const uint8_t *data, size_t len) + { + struct bt_hid_device_s *hid = opaque; + +diff --git a/hw/bt/l2cap.c b/hw/bt/l2cap.c +index d67098a..2f70a03 100644 +--- a/hw/bt/l2cap.c ++++ b/hw/bt/l2cap.c +@@ -31,10 +31,10 @@ struct l2cap_instance_s { + int role; + + uint8_t frame_in[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4))); +- int frame_in_len; ++ uint32_t frame_in_len; + + uint8_t frame_out[65535 + L2CAP_HDR_SIZE] __attribute__ ((aligned (4))); +- int frame_out_len; ++ uint32_t frame_out_len; + + /* Signalling channel timers. They exist per-request but we can make + * sure we have no more than one outstanding request at any time. */ +@@ -48,7 +48,7 @@ struct l2cap_instance_s { + struct bt_l2cap_conn_params_s params; + + void (*frame_in)(struct l2cap_chan_s *chan, uint16_t cid, +- const l2cap_hdr *hdr, int len); ++ const l2cap_hdr *hdr, size_t len); + int mps; + int min_mtu; + +@@ -67,7 +67,7 @@ struct l2cap_instance_s { + + /* Only flow-controlled, connection-oriented channels */ + uint8_t sdu[65536]; /* TODO: dynamically allocate */ +- int len_cur, len_total; ++ uint32_t len_cur, len_total; + int rexmit; + int monitor_timeout; + QEMUTimer *monitor_timer; +@@ -139,7 +139,7 @@ static const uint16_t l2cap_fcs16_table[256] = { + 0x8201, 0x42c0, 0x4380, 0x8341, 0x4100, 0x81c1, 0x8081, 0x4040, + }; + +-static uint16_t l2cap_fcs16(const uint8_t *message, int len) ++static uint16_t l2cap_fcs16(const uint8_t *message, size_t len) + { + uint16_t fcs = 0x0000; + +@@ -185,7 +185,7 @@ static void l2cap_monitor_timer_update(struct l2cap_chan_s *ch) + } + + static void l2cap_command_reject(struct l2cap_instance_s *l2cap, int id, +- uint16_t reason, const void *data, int plen) ++ uint16_t reason, const void *data, size_t plen) + { + uint8_t *pkt; + l2cap_cmd_hdr *hdr; +@@ -246,7 +246,7 @@ static void l2cap_connection_response(struct l2cap_instance_s *l2cap, + } + + static void l2cap_configuration_request(struct l2cap_instance_s *l2cap, +- int dcid, int flag, const uint8_t *data, int len) ++ int dcid, int flag, const uint8_t *data, size_t len) + { + uint8_t *pkt; + l2cap_cmd_hdr *hdr; +@@ -274,7 +274,7 @@ static void l2cap_configuration_request(struct l2cap_instance_s *l2cap, + } + + static void l2cap_configuration_response(struct l2cap_instance_s *l2cap, +- int scid, int flag, int result, const uint8_t *data, int len) ++ int scid, int flag, int result, const uint8_t *data, size_t len) + { + uint8_t *pkt; + l2cap_cmd_hdr *hdr; +@@ -321,7 +321,7 @@ static void l2cap_disconnection_response(struct l2cap_instance_s *l2cap, + } + + static void l2cap_echo_response(struct l2cap_instance_s *l2cap, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + uint8_t *pkt; + l2cap_cmd_hdr *hdr; +@@ -342,7 +342,7 @@ static void l2cap_echo_response(struct l2cap_instance_s *l2cap, + } + + static void l2cap_info_response(struct l2cap_instance_s *l2cap, int type, +- int result, const uint8_t *data, int len) ++ int result, const uint8_t *data, size_t len) + { + uint8_t *pkt; + l2cap_cmd_hdr *hdr; +@@ -365,16 +365,18 @@ static void l2cap_info_response(struct l2cap_instance_s *l2cap, int type, + l2cap->signalling_ch.params.sdu_submit(&l2cap->signalling_ch.params); + } + +-static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len); ++static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, ++ size_t len); + static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms); + #if 0 +-static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len); ++static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, ++ size_t len); + static void l2cap_iframe_submit(struct bt_l2cap_conn_params_s *parm); + #endif + static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, +- const l2cap_hdr *hdr, int len); ++ const l2cap_hdr *hdr, size_t len); + static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid, +- const l2cap_hdr *hdr, int len); ++ const l2cap_hdr *hdr, size_t len); + + static int l2cap_cid_new(struct l2cap_instance_s *l2cap) + { +@@ -498,7 +500,7 @@ static void l2cap_channel_config_req_event(struct l2cap_instance_s *l2cap, + + static int l2cap_channel_config(struct l2cap_instance_s *l2cap, + struct l2cap_chan_s *ch, int flag, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + l2cap_conf_opt *opt; + l2cap_conf_opt_qos *qos; +@@ -683,7 +685,7 @@ static int l2cap_channel_config(struct l2cap_instance_s *l2cap, + } + + static void l2cap_channel_config_req_msg(struct l2cap_instance_s *l2cap, +- int flag, int cid, const uint8_t *data, int len) ++ int flag, int cid, const uint8_t *data, size_t len) + { + struct l2cap_chan_s *ch; + +@@ -715,7 +717,7 @@ static void l2cap_channel_config_req_msg(struct l2cap_instance_s *l2cap, + } + + static int l2cap_channel_config_rsp_msg(struct l2cap_instance_s *l2cap, +- int result, int flag, int cid, const uint8_t *data, int len) ++ int result, int flag, int cid, const uint8_t *data, size_t len) + { + struct l2cap_chan_s *ch; + +@@ -783,7 +785,7 @@ static void l2cap_info(struct l2cap_instance_s *l2cap, int type) + } + + static void l2cap_command(struct l2cap_instance_s *l2cap, int code, int id, +- const uint8_t *params, int len) ++ const uint8_t *params, size_t len) + { + int err; + +@@ -938,7 +940,7 @@ static void l2cap_rexmit_enable(struct l2cap_chan_s *ch, int enable) + } + + /* Command frame SDU */ +-static void l2cap_cframe_in(void *opaque, const uint8_t *data, int len) ++static void l2cap_cframe_in(void *opaque, const uint8_t *data, size_t len) + { + struct l2cap_instance_s *l2cap = opaque; + const l2cap_cmd_hdr *hdr; +@@ -966,7 +968,7 @@ static void l2cap_cframe_in(void *opaque, const uint8_t *data, int len) + } + + /* Group frame SDU */ +-static void l2cap_gframe_in(void *opaque, const uint8_t *data, int len) ++static void l2cap_gframe_in(void *opaque, const uint8_t *data, size_t len) + { + } + +@@ -977,7 +979,7 @@ static void l2cap_sframe_in(struct l2cap_chan_s *ch, uint16_t ctrl) + + /* Basic L2CAP mode Information frame */ + static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, +- const l2cap_hdr *hdr, int len) ++ const l2cap_hdr *hdr, size_t len) + { + /* We have a full SDU, no further processing */ + ch->params.sdu_in(ch->params.opaque, hdr->data, len); +@@ -985,7 +987,7 @@ static void l2cap_bframe_in(struct l2cap_chan_s *ch, uint16_t cid, + + /* Flow Control and Retransmission mode frame */ + static void l2cap_iframe_in(struct l2cap_chan_s *ch, uint16_t cid, +- const l2cap_hdr *hdr, int len) ++ const l2cap_hdr *hdr, size_t len) + { + uint16_t fcs = lduw_le_p(hdr->data + len - 2); + +@@ -1076,7 +1078,7 @@ static void l2cap_frame_in(struct l2cap_instance_s *l2cap, + + /* "Recombination" */ + static void l2cap_pdu_in(struct l2cap_instance_s *l2cap, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + const l2cap_hdr *hdr = (void *) l2cap->frame_in; + +@@ -1123,7 +1125,7 @@ static inline void l2cap_pdu_submit(struct l2cap_instance_s *l2cap) + (l2cap->link, l2cap->frame_out, 1, l2cap->frame_out_len); + } + +-static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, int len) ++static uint8_t *l2cap_bframe_out(struct bt_l2cap_conn_params_s *parm, size_t len) + { + struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm; + +@@ -1146,7 +1148,7 @@ static void l2cap_bframe_submit(struct bt_l2cap_conn_params_s *parms) + + #if 0 + /* Stub: Only used if an emulated device requests outgoing flow control */ +-static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, int len) ++static uint8_t *l2cap_iframe_out(struct bt_l2cap_conn_params_s *parm, size_t len) + { + struct l2cap_chan_s *chan = (struct l2cap_chan_s *) parm; + +@@ -1291,7 +1293,7 @@ static void l2cap_lmp_disconnect_slave(struct bt_link_s *link) + } + + static void l2cap_lmp_acl_data_slave(struct bt_link_s *link, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + struct slave_l2cap_instance_s *l2cap = + (struct slave_l2cap_instance_s *) link; +@@ -1304,7 +1306,7 @@ static void l2cap_lmp_acl_data_slave(struct bt_link_s *link, + + /* Stub */ + static void l2cap_lmp_acl_data_host(struct bt_link_s *link, +- const uint8_t *data, int start, int len) ++ const uint8_t *data, int start, size_t len) + { + struct bt_l2cap_device_s *dev = (struct bt_l2cap_device_s *) link->host; + struct l2cap_instance_s *l2cap = +diff --git a/hw/bt/sdp.c b/hw/bt/sdp.c +index 2860d76..6bfb174 100644 +--- a/hw/bt/sdp.c ++++ b/hw/bt/sdp.c +@@ -496,7 +496,7 @@ static ssize_t sdp_svc_search_attr_get(struct bt_l2cap_sdp_state_s *sdp, + return end + 2; + } + +-static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) ++static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, size_t len) + { + struct bt_l2cap_sdp_state_s *sdp = opaque; + enum bt_sdp_cmd pdu_id; +@@ -506,7 +506,7 @@ static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) + int rsp_len = 0; + + if (len < 5) { +- error_report("%s: short SDP PDU (%iB).", __func__, len); ++ error_report("%s: short SDP PDU (%zuB).", __func__, len); + return; + } + +@@ -517,7 +517,7 @@ static void bt_l2cap_sdp_sdu_in(void *opaque, const uint8_t *data, int len) + len -= 5; + + if (len != plen) { +- error_report("%s: wrong SDP PDU length (%iB != %iB).", ++ error_report("%s: wrong SDP PDU length (%iB != %zuB).", + __func__, plen, len); + err = SDP_INVALID_PDU_SIZE; + goto respond; +diff --git a/hw/usb/dev-bluetooth.c b/hw/usb/dev-bluetooth.c +index 670ba32..240a901 100644 +--- a/hw/usb/dev-bluetooth.c ++++ b/hw/usb/dev-bluetooth.c +@@ -265,7 +265,7 @@ static void usb_bt_fifo_reset(struct usb_hci_in_fifo_s *fifo) + } + + static void usb_bt_fifo_enqueue(struct usb_hci_in_fifo_s *fifo, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + int off = fifo->dstart + fifo->dlen; + uint8_t *buf; +@@ -274,13 +274,13 @@ static void usb_bt_fifo_enqueue(struct usb_hci_in_fifo_s *fifo, + if (off <= DFIFO_LEN_MASK) { + if (off + len > DFIFO_LEN_MASK + 1 && + (fifo->dsize = off + len) > (DFIFO_LEN_MASK + 1) * 2) { +- fprintf(stderr, "%s: can't alloc %i bytes\n", __func__, len); ++ fprintf(stderr, "%s: can't alloc %zu bytes\n", __func__, len); + exit(-1); + } + buf = fifo->data + off; + } else { + if (fifo->dlen > fifo->dsize) { +- fprintf(stderr, "%s: can't alloc %i bytes\n", __func__, len); ++ fprintf(stderr, "%s: can't alloc %zu bytes\n", __func__, len); + exit(-1); + } + buf = fifo->data + off - fifo->dsize; +@@ -319,7 +319,7 @@ static inline void usb_bt_fifo_dequeue(struct usb_hci_in_fifo_s *fifo, + + static inline void usb_bt_fifo_out_enqueue(struct USBBtState *s, + struct usb_hci_out_fifo_s *fifo, +- void (*send)(struct HCIInfo *, const uint8_t *, int), ++ void (*send)(struct HCIInfo *, const uint8_t *, size_t), + int (*complete)(const uint8_t *, int), + USBPacket *p) + { +@@ -478,7 +478,7 @@ static void usb_bt_handle_data(USBDevice *dev, USBPacket *p) + } + + static void usb_bt_out_hci_packet_event(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + struct USBBtState *s = (struct USBBtState *) opaque; + +@@ -489,7 +489,7 @@ static void usb_bt_out_hci_packet_event(void *opaque, + } + + static void usb_bt_out_hci_packet_acl(void *opaque, +- const uint8_t *data, int len) ++ const uint8_t *data, size_t len) + { + struct USBBtState *s = (struct USBBtState *) opaque; + +diff --git a/include/hw/bt.h b/include/hw/bt.h +index b5e11d4..bc362aa 100644 +--- a/include/hw/bt.h ++++ b/include/hw/bt.h +@@ -94,9 +94,9 @@ struct bt_device_s { + void (*lmp_disconnect_master)(struct bt_link_s *link); + void (*lmp_disconnect_slave)(struct bt_link_s *link); + void (*lmp_acl_data)(struct bt_link_s *link, const uint8_t *data, +- int start, int len); ++ int start, size_t len); + void (*lmp_acl_resp)(struct bt_link_s *link, const uint8_t *data, +- int start, int len); ++ int start, size_t len); + void (*lmp_mode_change)(struct bt_link_s *link); + + void (*handle_destroy)(struct bt_device_s *device); +@@ -148,12 +148,12 @@ struct bt_l2cap_device_s { + + struct bt_l2cap_conn_params_s { + /* Input */ +- uint8_t *(*sdu_out)(struct bt_l2cap_conn_params_s *chan, int len); ++ uint8_t *(*sdu_out)(struct bt_l2cap_conn_params_s *chan, size_t len); + void (*sdu_submit)(struct bt_l2cap_conn_params_s *chan); + int remote_mtu; + /* Output */ + void *opaque; +- void (*sdu_in)(void *opaque, const uint8_t *data, int len); ++ void (*sdu_in)(void *opaque, const uint8_t *data, size_t len); + void (*close)(void *opaque); + }; + +diff --git a/include/sysemu/bt.h b/include/sysemu/bt.h +index 2fd8c0f..df8fb63 100644 +--- a/include/sysemu/bt.h ++++ b/include/sysemu/bt.h +@@ -5,12 +5,12 @@ + + typedef struct HCIInfo { + int (*bdaddr_set)(struct HCIInfo *hci, const uint8_t *bd_addr); +- void (*cmd_send)(struct HCIInfo *hci, const uint8_t *data, int len); +- void (*sco_send)(struct HCIInfo *hci, const uint8_t *data, int len); +- void (*acl_send)(struct HCIInfo *hci, const uint8_t *data, int len); ++ void (*cmd_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); ++ void (*sco_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); ++ void (*acl_send)(struct HCIInfo *hci, const uint8_t *data, size_t len); + void *opaque; +- void (*evt_recv)(void *opaque, const uint8_t *data, int len); +- void (*acl_recv)(void *opaque, const uint8_t *data, int len); ++ void (*evt_recv)(void *opaque, const uint8_t *data, size_t len); ++ void (*acl_recv)(void *opaque, const uint8_t *data, size_t len); + } HCIInfo; + + /* bt-host.c */ +-- +1.8.3.1 + diff --git a/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch b/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..e714cb10bef351cd6ed6df71d08f3666df879ed5 --- /dev/null +++ b/bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch @@ -0,0 +1,44 @@ +From b9e4a4ff6f3292927adb1463777c86cd4063a6ef Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sat, 18 Apr 2020 12:10:11 +0800 +Subject: [PATCH] bugfix: Use gicr_typer in arm_gicv3_icc_reset + +The KVM_VGIC_ATTR macro expect the second parameter as gicr_typer, +of which high 32bit is constructed by mp_affinity. For most case, +the high 32bit of mp_affinity is zero, so it will always access the +ICC_CTLR_EL1 of CPU0. + +Signed-off-by: Keqian Zhu +--- + hw/intc/arm_gicv3_kvm.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index d9c72f85be..b1e74147ba 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -661,13 +661,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) + + static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + { +- ARMCPU *cpu; + GICv3State *s; + GICv3CPUState *c; + + c = (GICv3CPUState *)env->gicv3state; + s = c->gic; +- cpu = ARM_CPU(c->cpu); + + c->icc_pmr_el1 = 0; + c->icc_bpr[GICV3_G0] = GIC_MIN_BPR; +@@ -684,7 +682,7 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, +- KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity), ++ KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), + &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; +-- +2.19.1 diff --git a/build-smt-processor-structure-to-support-smt-topolog.patch b/build-smt-processor-structure-to-support-smt-topolog.patch new file mode 100644 index 0000000000000000000000000000000000000000..ed01d38bb17dd4931a1f23b5a21127a2c5dd0425 --- /dev/null +++ b/build-smt-processor-structure-to-support-smt-topolog.patch @@ -0,0 +1,104 @@ +From af8740502815be450709e88df44ad322da2b071f Mon Sep 17 00:00:00 2001 +From: Henglong Fan +Date: Tue, 18 Aug 2020 21:42:33 +0800 +Subject: [PATCH] build smt processor structure to support smt topology + +if vcpu support smt, create new smt hierarchy according to +Processor Properties Topology Table(PPTT) in acpi spec 6.3. +Threads sharing a core must be grouped under a unique Processor +hierarchy node structure for each group of threads + +Signed-off-by: Henglong Fan +--- + hw/acpi/aml-build.c | 40 ++++++++++++++++++++++++++++++++-------- + 1 file changed, 32 insertions(+), 8 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 74e95005..8a3b51c8 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -53,7 +53,7 @@ static void build_append_array(GArray *array, GArray *val) + } + + /* +- * ACPI 6.2 Processor Properties Topology Table (PPTT) ++ * ACPI 6.3 Processor Properties Topology Table (PPTT) + */ + #ifdef __aarch64__ + static void build_cache_head(GArray *tbl, uint32_t next_level) +@@ -126,7 +126,7 @@ static void build_arm_socket_hierarchy(GArray *tbl, + build_append_int_noprefix(tbl, offset, 4); + } + +-static void build_arm_cpu_hierarchy(GArray *tbl, ++static void build_arm_core_hierarchy(GArray *tbl, + struct offset_status *offset, uint32_t id) + { + if (!offset) { +@@ -144,18 +144,35 @@ static void build_arm_cpu_hierarchy(GArray *tbl, + build_append_int_noprefix(tbl, offset->l2_offset, 4); + } + ++static void build_arm_smt_hierarchy(GArray *tbl, ++ uint32_t offset, uint32_t id) ++{ ++ if (!offset) { ++ return; ++ } ++ build_append_byte(tbl, 0); /* Type 0 - processor */ ++ build_append_byte(tbl, 20); /* Length, add private resources */ ++ build_append_int_noprefix(tbl, 0, 2); /* Reserved */ ++ build_append_int_noprefix(tbl, 14, 4); /* Valid id*/ ++ build_append_int_noprefix(tbl, offset, 4); ++ build_append_int_noprefix(tbl, id, 4); ++ build_append_int_noprefix(tbl, 0, 4); /* Num private resources */ ++} ++ + void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) + { + int pptt_start = table_data->len; +- int uid = 0, cpus = 0, socket; ++ int uid = 0, socket; ++ uint32_t core_offset; + struct offset_status offset; + const MachineState *ms = MACHINE(qdev_get_machine()); + unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_sockets = ms->smp.cpus / (smp_cores * ms->smp.threads); + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + +- for (socket = 0; cpus < possible_cpus; socket++) { +- int core; ++ for (socket = 0; socket < smp_sockets; socket++) { ++ int core,thread; + uint32_t l3_offset = table_data->len - pptt_start; + build_cache_hierarchy(table_data, 0, ARM_L3_CACHE); + +@@ -169,14 +186,21 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) + build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1D_CACHE); + offset.l1i_offset = table_data->len - pptt_start; + build_cache_hierarchy(table_data, offset.l2_offset, ARM_L1I_CACHE); +- build_arm_cpu_hierarchy(table_data, &offset, uid++); +- cpus++; ++ core_offset = table_data->len - pptt_start; ++ if (ms->smp.threads <= 1) { ++ build_arm_core_hierarchy(table_data, &offset, uid++); ++ } else { ++ build_arm_core_hierarchy(table_data, &offset, core); ++ for (thread = 0; thread < ms->smp.threads; thread++) { ++ build_arm_smt_hierarchy(table_data, core_offset, uid++); ++ } ++ } + } + } + + build_header(linker, table_data, + (void *)(table_data->data + pptt_start), "PPTT", +- table_data->len - pptt_start, 1, NULL, NULL); ++ table_data->len - pptt_start, 2, NULL, NULL); + } + + #else +-- +2.23.0 + diff --git a/char-fix-use-after-free-with-dup-chardev-reconnect.patch b/char-fix-use-after-free-with-dup-chardev-reconnect.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd81015a18beced443caef903d0ec1f2a1fd8850 --- /dev/null +++ b/char-fix-use-after-free-with-dup-chardev-reconnect.patch @@ -0,0 +1,126 @@ +From 902a8192600ff81681a162509e23bf95619d1f04 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 20 Apr 2020 13:20:12 +0200 +Subject: [PATCH] char: fix use-after-free with dup chardev & reconnect +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With a reconnect socket, qemu_char_open() will start a background +thread. It should keep a reference on the chardev. + +Fixes invalid read: +READ of size 8 at 0x6040000ac858 thread T7 + #0 0x5555598d37b8 in unix_connect_saddr /home/elmarco/src/qq/util/qemu-sockets.c:954 + #1 0x5555598d4751 in socket_connect /home/elmarco/src/qq/util/qemu-sockets.c:1109 + #2 0x555559707c34 in qio_channel_socket_connect_sync /home/elmarco/src/qq/io/channel-socket.c:145 + #3 0x5555596adebb in tcp_chr_connect_client_task /home/elmarco/src/qq/chardev/char-socket.c:1104 + #4 0x555559723d55 in qio_task_thread_worker /home/elmarco/src/qq/io/task.c:123 + #5 0x5555598a6731 in qemu_thread_start /home/elmarco/src/qq/util/qemu-thread-posix.c:519 + #6 0x7ffff40d4431 in start_thread (/lib64/libpthread.so.0+0x9431) + #7 0x7ffff40029d2 in __clone (/lib64/libc.so.6+0x1019d2) + +Signed-off-by: Marc-André Lureau +Reviewed-by: Daniel P. Berrangé +Message-Id: <20200420112012.567284-1-marcandre.lureau@redhat.com> +Signed-off-by: Zhenyu Ye +--- + chardev/char-socket.c | 3 ++- + tests/test-char.c | 53 ++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 54 insertions(+), 2 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 7ca5d97a..701b62f9 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -1118,7 +1118,8 @@ static void tcp_chr_connect_client_async(Chardev *chr) + */ + s->connect_task = qio_task_new(OBJECT(sioc), + qemu_chr_socket_connected, +- chr, NULL); ++ object_ref(OBJECT(chr)), ++ (GDestroyNotify)object_unref); + qio_task_run_in_thread(s->connect_task, + tcp_chr_connect_client_task, + s->addr, +diff --git a/tests/test-char.c b/tests/test-char.c +index f9440cdc..0e4069fb 100644 +--- a/tests/test-char.c ++++ b/tests/test-char.c +@@ -871,6 +871,53 @@ typedef struct { + } CharSocketClientTestConfig; + + ++static void char_socket_client_dupid_test(gconstpointer opaque) ++{ ++ const CharSocketClientTestConfig *config = opaque; ++ QIOChannelSocket *ioc; ++ char *optstr; ++ Chardev *chr1, *chr2; ++ SocketAddress *addr; ++ QemuOpts *opts; ++ Error *local_err = NULL; ++ ++ /* ++ * Setup a listener socket and determine get its address ++ * so we know the TCP port for the client later ++ */ ++ ioc = qio_channel_socket_new(); ++ g_assert_nonnull(ioc); ++ qio_channel_socket_listen_sync(ioc, config->addr, &error_abort); ++ addr = qio_channel_socket_get_local_address(ioc, &error_abort); ++ g_assert_nonnull(addr); ++ ++ /* ++ * Populate the chardev address based on what the server ++ * is actually listening on ++ */ ++ optstr = char_socket_addr_to_opt_str(addr, ++ config->fd_pass, ++ config->reconnect, ++ false); ++ ++ opts = qemu_opts_parse_noisily(qemu_find_opts("chardev"), ++ optstr, true); ++ g_assert_nonnull(opts); ++ chr1 = qemu_chr_new_from_opts(opts, NULL, &error_abort); ++ g_assert_nonnull(chr1); ++ ++ chr2 = qemu_chr_new_from_opts(opts, NULL, &local_err); ++ g_assert_null(chr2); ++ error_free_or_abort(&local_err); ++ ++ object_unref(OBJECT(ioc)); ++ qemu_opts_del(opts); ++ object_unparent(OBJECT(chr1)); ++ qapi_free_SocketAddress(addr); ++ g_free(optstr); ++} ++ ++ + static void char_socket_client_test(gconstpointer opaque) + { + const CharSocketClientTestConfig *config = opaque; +@@ -1425,6 +1472,8 @@ int main(int argc, char **argv) + { addr, NULL, false, true }; \ + CharSocketClientTestConfig client6 ## name = \ + { addr, NULL, true, true }; \ ++ CharSocketClientTestConfig client7 ## name = \ ++ { addr, ",reconnect=1", false, false }; \ + g_test_add_data_func("/char/socket/client/mainloop/" # name, \ + &client1 ##name, char_socket_client_test); \ + g_test_add_data_func("/char/socket/client/wait-conn/" # name, \ +@@ -1436,7 +1485,9 @@ int main(int argc, char **argv) + g_test_add_data_func("/char/socket/client/mainloop-fdpass/" # name, \ + &client5 ##name, char_socket_client_test); \ + g_test_add_data_func("/char/socket/client/wait-conn-fdpass/" # name, \ +- &client6 ##name, char_socket_client_test) ++ &client6 ##name, char_socket_client_test); \ ++ g_test_add_data_func("/char/socket/client/dupid-reconnect/" # name, \ ++ &client7 ##name, char_socket_client_dupid_test) + + SOCKET_SERVER_TEST(tcp, &tcpaddr); + SOCKET_CLIENT_TEST(tcp, &tcpaddr); +-- +2.22.0.windows.1 + diff --git a/chardev-tcp-Fix-error-message-double-free-error.patch b/chardev-tcp-Fix-error-message-double-free-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..175ddfe2dea85111016d162aa0cd95d79a49a492 --- /dev/null +++ b/chardev-tcp-Fix-error-message-double-free-error.patch @@ -0,0 +1,43 @@ +From 4488ab4700d344b049ddef808a64eda4b5867902 Mon Sep 17 00:00:00 2001 +From: lichun +Date: Mon, 22 Jun 2020 05:30:17 +0800 +Subject: [PATCH 06/11] chardev/tcp: Fix error message double free error + +Errors are already freed by error_report_err, so we only need to call +error_free when that function is not called. + +Cc: qemu-stable@nongnu.org +Signed-off-by: lichun +Message-Id: <20200621213017.17978-1-lichun@ruijie.com.cn> +Reviewed-by: Markus Armbruster +[Commit message improved, cc: qemu-stable] +Signed-off-by: Markus Armbruster +Signed-off-by: BiaoXiang Ye +--- + chardev/char-socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 701b62f9..9b06c8aa 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -141,6 +141,8 @@ static void check_report_connect_error(Chardev *chr, + error_report("Unable to connect character device %s: %s", + chr->label, error_get_pretty(err)); + s->connect_err_reported = true; ++ } else { ++ error_free(err); + } + qemu_chr_socket_restart_timer(chr); + } +@@ -1074,7 +1076,6 @@ static void qemu_chr_socket_connected(QIOTask *task, void *opaque) + if (qio_task_propagate_error(task, &err)) { + tcp_chr_change_state(s, TCP_CHARDEV_STATE_DISCONNECTED); + check_report_connect_error(chr, err); +- error_free(err); + goto cleanup; + } + +-- +2.27.0.dirty + diff --git a/colo-compare-Fix-memory-leak-in-packet_enqueue.patch b/colo-compare-Fix-memory-leak-in-packet_enqueue.patch new file mode 100644 index 0000000000000000000000000000000000000000..ca5e43c49a6ad18fa7c6d204c1eabfac7ed6ddd5 --- /dev/null +++ b/colo-compare-Fix-memory-leak-in-packet_enqueue.patch @@ -0,0 +1,90 @@ +From 19afb1431bd730a1e4e09e3c0835c35572517268 Mon Sep 17 00:00:00 2001 +From: Derek Su +Date: Fri, 22 May 2020 15:53:57 +0800 +Subject: [PATCH 07/11] colo-compare: Fix memory leak in packet_enqueue() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The patch is to fix the "pkt" memory leak in packet_enqueue(). +The allocated "pkt" needs to be freed if the colo compare +primary or secondary queue is too big. + +Replace the error_report of full queue with a trace event. + +Signed-off-by: Derek Su +Reviewed-by: Zhang Chen +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Zhang Chen +Signed-off-by: Jason Wang +Signed-off-by: BiaoXiang Ye +--- + net/colo-compare.c | 23 +++++++++++++++-------- + net/trace-events | 1 + + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/net/colo-compare.c b/net/colo-compare.c +index 7ee17f2c..3168407e 100644 +--- a/net/colo-compare.c ++++ b/net/colo-compare.c +@@ -120,6 +120,10 @@ enum { + SECONDARY_IN, + }; + ++static const char *colo_mode[] = { ++ [PRIMARY_IN] = "primary", ++ [SECONDARY_IN] = "secondary", ++}; + + static int compare_chr_send(CompareState *s, + const uint8_t *buf, +@@ -215,6 +219,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) + ConnectionKey key; + Packet *pkt = NULL; + Connection *conn; ++ int ret; + + if (mode == PRIMARY_IN) { + pkt = packet_new(s->pri_rs.buf, +@@ -243,16 +248,18 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) + } + + if (mode == PRIMARY_IN) { +- if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { +- error_report("colo compare primary queue size too big," +- "drop packet"); +- } ++ ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack); + } else { +- if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { +- error_report("colo compare secondary queue size too big," +- "drop packet"); +- } ++ ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack); + } ++ ++ if (!ret) { ++ trace_colo_compare_drop_packet(colo_mode[mode], ++ "queue size too big, drop packet"); ++ packet_destroy(pkt, NULL); ++ pkt = NULL; ++ } ++ + *con = conn; + + return 0; +diff --git a/net/trace-events b/net/trace-events +index ac570564..a9995387 100644 +--- a/net/trace-events ++++ b/net/trace-events +@@ -12,6 +12,7 @@ colo_proxy_main(const char *chr) ": %s" + + # colo-compare.c + colo_compare_main(const char *chr) ": %s" ++colo_compare_drop_packet(const char *queue, const char *chr) ": %s: %s" + colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" + colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" + colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" +-- +2.27.0.dirty + diff --git a/configure-Enable-test-and-libs-for-zstd.patch b/configure-Enable-test-and-libs-for-zstd.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf900cf343d4d77c997d203ffcd12a8dc6c434fb --- /dev/null +++ b/configure-Enable-test-and-libs-for-zstd.patch @@ -0,0 +1,121 @@ +From 5a79ccd388ee09dc1db93d26791d1e4a6b2ced47 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 3 Feb 2021 17:33:44 +0800 +Subject: [PATCH] configure: Enable test and libs for zstd + +configure: Enable test and libs for zstd +Add it to several build systems to make testing good. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +Signed-off-by: Jiajie Li +--- + .gitlab-ci.yml | 1 + + .travis.yml | 1 + + configure | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml +index c63bf2f822..3d9b7f9262 100644 +--- a/.gitlab-ci.yml ++++ b/.gitlab-ci.yml +@@ -16,6 +16,7 @@ build-system2: + script: + - apt-get install -y -qq libsdl2-dev libgcrypt-dev libbrlapi-dev libaio-dev + libfdt-dev liblzo2-dev librdmacm-dev libibverbs-dev libibumad-dev ++ libzstd-dev + - ./configure --enable-werror --target-list="tricore-softmmu unicore32-softmmu + microblaze-softmmu mips-softmmu riscv32-softmmu s390x-softmmu sh4-softmmu + sparc64-softmmu x86_64-softmmu xtensa-softmmu nios2-softmmu or1k-softmmu" +diff --git a/.travis.yml b/.travis.yml +index caf0a1f8fa..f3fe04fba9 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -35,6 +35,7 @@ addons: + - liburcu-dev + - libusb-1.0-0-dev + - libvte-2.91-dev ++ - libzstd-dev + - sparse + - uuid-dev + - gcovr +diff --git a/configure b/configure +index 714e7fb6a1..577533e9ed 100755 +--- a/configure ++++ b/configure +@@ -446,6 +446,7 @@ lzo="" + snappy="" + bzip2="" + lzfse="" ++zstd="" + guest_agent="" + guest_agent_with_vss="no" + guest_agent_ntddscsi="no" +@@ -1358,6 +1359,10 @@ for opt do + ;; + --disable-lzfse) lzfse="no" + ;; ++ --disable-zstd) zstd="no" ++ ;; ++ --enable-zstd) zstd="yes" ++ ;; + --enable-guest-agent) guest_agent="yes" + ;; + --disable-guest-agent) guest_agent="no" +@@ -1812,6 +1817,8 @@ disabled with --disable-FEATURE, default is enabled if available: + (for reading bzip2-compressed dmg images) + lzfse support of lzfse compression library + (for reading lzfse-compressed dmg images) ++ zstd support for zstd compression library ++ (for migration compression) + seccomp seccomp support + coroutine-pool coroutine freelist (better performance) + glusterfs GlusterFS backend +@@ -2407,6 +2414,24 @@ EOF + fi + fi + ++########################################## ++# zstd check ++ ++if test "$zstd" != "no" ; then ++ if $pkg_config --exist libzstd ; then ++ zstd_cflags="$($pkg_config --cflags libzstd)" ++ zstd_libs="$($pkg_config --libs libzstd)" ++ LIBS="$zstd_libs $LIBS" ++ QEMU_CFLAGS="$QEMU_CFLAGS $zstd_cflags" ++ zstd="yes" ++ else ++ if test "$zstd" = "yes" ; then ++ feature_not_found "libzstd" "Install libzstd devel" ++ fi ++ zstd="no" ++ fi ++fi ++ + ########################################## + # libseccomp check + +@@ -6460,6 +6485,7 @@ echo "lzo support $lzo" + echo "snappy support $snappy" + echo "bzip2 support $bzip2" + echo "lzfse support $lzfse" ++echo "zstd support $zstd" + echo "NUMA host support $numa" + echo "libxml2 $libxml2" + echo "tcmalloc support $tcmalloc" +@@ -7024,6 +7050,10 @@ if test "$lzfse" = "yes" ; then + echo "LZFSE_LIBS=-llzfse" >> $config_host_mak + fi + ++if test "$zstd" = "yes" ; then ++ echo "CONFIG_ZSTD=y" >> $config_host_mak ++fi ++ + if test "$libiscsi" = "yes" ; then + echo "CONFIG_LIBISCSI=m" >> $config_host_mak + echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak +-- +2.27.0 + diff --git a/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch b/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch new file mode 100644 index 0000000000000000000000000000000000000000..44fc4283ca652bb9743c186b2127f073e32b5043 --- /dev/null +++ b/contrib-libvhost-user-Protect-slave-fd-with-mutex.patch @@ -0,0 +1,121 @@ +From f076af734a5964c3e48b2d223130f855b86f40e5 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Fri, 1 Mar 2019 11:18:30 +0000 +Subject: [PATCH] contrib/libvhost-user: Protect slave fd with mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In future patches we'll be performing commands on the slave-fd driven +by commands on queues, since those queues will be driven by individual +threads we need to make sure they don't attempt to use the slave-fd +for multiple commands in parallel. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +--- + contrib/libvhost-user/libvhost-user.c | 24 ++++++++++++++++++++---- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index cb5f5770e4..fb75837032 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -387,26 +387,37 @@ vu_send_reply(VuDev *dev, int conn_fd, VhostUserMsg *vmsg) + return vu_message_write(dev, conn_fd, vmsg); + } + ++/* ++ * Processes a reply on the slave channel. ++ * Entered with slave_mutex held and releases it before exit. ++ * Returns true on success. ++ */ + static bool + vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg) + { + VhostUserMsg msg_reply; ++ bool result = false; + + if ((vmsg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { +- return true; ++ result = true; ++ goto out; + } + + if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) { +- return false; ++ goto out; + } + + if (msg_reply.request != vmsg->request) { + DPRINT("Received unexpected msg type. Expected %d received %d", + vmsg->request, msg_reply.request); +- return false; ++ goto out; + } + +- return msg_reply.payload.u64 == 0; ++ result = msg_reply.payload.u64 == 0; ++ ++out: ++ pthread_mutex_unlock(&dev->slave_mutex); ++ return result; + } + + /* Kick the log_call_fd if required. */ +@@ -1102,10 +1113,13 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + return false; + } + ++ pthread_mutex_lock(&dev->slave_mutex); + if (!vu_message_write(dev, dev->slave_fd, &vmsg)) { ++ pthread_mutex_unlock(&dev->slave_mutex); + return false; + } + ++ /* Also unlocks the slave_mutex */ + return vu_process_message_reply(dev, &vmsg); + } + +@@ -1625,6 +1639,7 @@ vu_deinit(VuDev *dev) + close(dev->slave_fd); + dev->slave_fd = -1; + } ++ pthread_mutex_destroy(&dev->slave_mutex); + + if (dev->sock != -1) { + close(dev->sock); +@@ -1660,6 +1675,7 @@ vu_init(VuDev *dev, + dev->remove_watch = remove_watch; + dev->iface = iface; + dev->log_call_fd = -1; ++ pthread_mutex_init(&dev->slave_mutex, NULL); + dev->slave_fd = -1; + dev->max_queues = max_queues; + +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 46b600799b..1844b6f8d4 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include "standard-headers/linux/virtio_ring.h" + + /* Based on qemu/hw/virtio/vhost-user.c */ +@@ -355,6 +356,8 @@ struct VuDev { + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; ++ /* Must be held while using slave_fd */ ++ pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; +-- +2.27.0 + diff --git a/coroutine-Add-qemu_co_mutex_assert_locked.patch b/coroutine-Add-qemu_co_mutex_assert_locked.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb1f2589f3edd987f0311288d049951c726ddeb8 --- /dev/null +++ b/coroutine-Add-qemu_co_mutex_assert_locked.patch @@ -0,0 +1,50 @@ +From e9bb3d942e268a19e03fc5d404586d2ed1564282 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 24 Oct 2019 16:26:57 +0200 +Subject: [PATCH] coroutine: Add qemu_co_mutex_assert_locked() + +Some functions require that the caller holds a certain CoMutex for them +to operate correctly. Add a function so that they can assert the lock is +really held. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Tested-by: Michael Weiser +Reviewed-by: Michael Weiser +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Denis V. Lunev +Reviewed-by: Max Reitz +(cherry picked from commit 944f3d5dd216fcd8cb007eddd4f82dced0a15b3d) +Signed-off-by: Michael Roth +--- + include/qemu/coroutine.h | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index 9801e7f5a4..f4843b5f59 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -167,6 +167,21 @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); + */ + void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); + ++/** ++ * Assert that the current coroutine holds @mutex. ++ */ ++static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex) ++{ ++ /* ++ * mutex->holder doesn't need any synchronisation if the assertion holds ++ * true because the mutex protects it. If it doesn't hold true, we still ++ * don't mind if another thread takes or releases mutex behind our back, ++ * because the condition will be false no matter whether we read NULL or ++ * the pointer for any other coroutine. ++ */ ++ assert(atomic_read(&mutex->locked) && ++ mutex->holder == qemu_coroutine_self()); ++} + + /** + * CoQueues are a mechanism to queue coroutines in order to continue executing +-- +2.23.0 diff --git a/cpu-add-Kunpeng-920-cpu-support.patch b/cpu-add-Kunpeng-920-cpu-support.patch index 149a1a123e8deae9de1e634aa025618518c7f56f..74e27645ac18a07c00a04283f428bec515cf259e 100644 --- a/cpu-add-Kunpeng-920-cpu-support.patch +++ b/cpu-add-Kunpeng-920-cpu-support.patch @@ -3,6 +3,8 @@ From: Xu Yandong Date: Wed, 28 Aug 2019 01:36:21 -0400 Subject: [PATCH] cpu: add Kunpeng-920 cpu support +Add the Kunpeng-920 CPU model. + Signed-off-by: Xu Yandong --- hw/arm/virt.c | 1 + @@ -28,7 +30,7 @@ index 228906f2..5581d5e1 100644 @@ -258,6 +258,26 @@ static void aarch64_a72_initfn(Object *obj) define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); } - + +static void aarch64_kunpeng_920_initfn(Object *obj) +{ + ARMCPU *cpu = ARM_CPU(obj); @@ -60,6 +62,5 @@ index 228906f2..5581d5e1 100644 { .name = "max", .initfn = aarch64_max_initfn }, { .name = NULL } }; --- +-- 2.19.1 - diff --git a/cpu-parse-feature-to-avoid-failure.patch b/cpu-parse-feature-to-avoid-failure.patch index a241a5f140c371f792a75839c53fb5b8779f7060..78178bfa3dd2a9dd1413dd3c12bbc8e6ed6d2869 100644 --- a/cpu-parse-feature-to-avoid-failure.patch +++ b/cpu-parse-feature-to-avoid-failure.patch @@ -1,8 +1,11 @@ From ba1ca232cfa2ca273c610beda40bee2143f11964 Mon Sep 17 00:00:00 2001 -From: rpm-build +From: Xu Yandong Date: Tue, 3 Sep 2019 16:27:39 +0800 Subject: [PATCH] cpu: parse +/- feature to avoid failure +To avoid cpu feature parse failuer, +/- feature is added. + +Signed-off-by: Xu Yandong --- target/arm/cpu64.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/cris-do-not-leak-struct-cris_disasm_data.patch b/cris-do-not-leak-struct-cris_disasm_data.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa7623fe1878eca815805e853d64ff9b2d8a88a3 --- /dev/null +++ b/cris-do-not-leak-struct-cris_disasm_data.patch @@ -0,0 +1,139 @@ +From d0586065e67b5df2611f4cf61eb791d48b78ff77 Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 14:42:59 +0800 +Subject: [PATCH] cris: do not leak struct cris_disasm_data + +Use a stack-allocated struct to avoid a memory leak. + +Signed-off-by: Paolo Bonzini +--- + disas/cris.c | 65 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 35 insertions(+), 30 deletions(-) + +diff --git a/disas/cris.c b/disas/cris.c +index 2f43c9b2..f3ff44ba 100644 +--- a/disas/cris.c ++++ b/disas/cris.c +@@ -1294,24 +1294,17 @@ static int cris_constraint + /* Parse disassembler options and store state in info. FIXME: For the + time being, we abuse static variables. */ + +-static bfd_boolean +-cris_parse_disassembler_options (disassemble_info *info, ++static void ++cris_parse_disassembler_options (struct cris_disasm_data *disdata, ++ char *disassembler_options, + enum cris_disass_family distype) + { +- struct cris_disasm_data *disdata; +- +- info->private_data = calloc (1, sizeof (struct cris_disasm_data)); +- disdata = (struct cris_disasm_data *) info->private_data; +- if (disdata == NULL) +- return false; +- + /* Default true. */ + disdata->trace_case +- = (info->disassembler_options == NULL +- || (strcmp (info->disassembler_options, "nocase") != 0)); ++ = (disassembler_options == NULL ++ || (strcmp (disassembler_options, "nocase") != 0)); + + disdata->distype = distype; +- return true; + } + + static const struct cris_spec_reg * +@@ -2736,9 +2729,11 @@ static int + print_insn_cris_with_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_v0_v10); ++ + return print_insn_cris_generic (vma, info, true); + } + /* Disassemble, prefixing register names with `$'. CRIS v32. */ +@@ -2747,9 +2742,11 @@ static int + print_insn_crisv32_with_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_v32)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_v32); ++ + return print_insn_cris_generic (vma, info, true); + } + +@@ -2761,9 +2758,11 @@ static int + print_insn_crisv10_v32_with_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_common_v10_v32); ++ + return print_insn_cris_generic (vma, info, true); + } + +@@ -2773,9 +2772,11 @@ static int + print_insn_cris_without_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_v0_v10)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_v0_v10); ++ + return print_insn_cris_generic (vma, info, false); + } + +@@ -2785,9 +2786,11 @@ static int + print_insn_crisv32_without_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_v32)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_v32); ++ + return print_insn_cris_generic (vma, info, false); + } + +@@ -2798,9 +2801,11 @@ static int + print_insn_crisv10_v32_without_register_prefix (bfd_vma vma, + disassemble_info *info) + { +- if (info->private_data == NULL +- && !cris_parse_disassembler_options (info, cris_dis_common_v10_v32)) +- return -1; ++ struct cris_disasm_data disdata; ++ info->private_data = &disdata; ++ cris_parse_disassembler_options (&disdata, info->disassembler_options, ++ cris_dis_common_v10_v32); ++ + return print_insn_cris_generic (vma, info, false); + } + #endif +-- +2.19.1 + diff --git a/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch b/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch new file mode 100644 index 0000000000000000000000000000000000000000..d204f017b830d15bb9609570e39fb11e34676203 --- /dev/null +++ b/crypto-add-support-for-gcrypt-s-native-XTS-impl.patch @@ -0,0 +1,346 @@ +From 84352558eec97cfb0e4517fbb53d75d9f15cbcf9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 14 Oct 2019 17:28:27 +0100 +Subject: [PATCH] crypto: add support for gcrypt's native XTS impl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Libgcrypt 1.8.0 added support for the XTS mode. Use this because long +term we wish to delete QEMU's XTS impl to avoid carrying private crypto +algorithm impls. + +As an added benefit, using this improves performance from 531 MB/sec to +670 MB/sec, since we are avoiding several layers of function call +indirection. + +This is even more noticable with the gcrypt builds in Fedora or RHEL-8 +which have a non-upstream patch for FIPS mode which does mutex locking. +This is catastrophic for encryption performance with small block sizes, +meaning this patch improves encryption from 240 MB/sec to 670 MB/sec. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefano Garzarella +Signed-off-by: Daniel P. Berrangé +--- + configure | 22 ++++++++++ + crypto/Makefile.objs | 2 +- + crypto/cipher-gcrypt.c | 97 ++++++++++++++++++++++++++++-------------- + tests/Makefile.include | 2 +- + 4 files changed, 88 insertions(+), 35 deletions(-) + +diff --git a/configure b/configure +index 5dcaac3b95..a88cdd5109 100755 +--- a/configure ++++ b/configure +@@ -476,6 +476,8 @@ nettle="" + nettle_xts="no" + gcrypt="" + gcrypt_hmac="no" ++gcrypt_xts="no" ++qemu_private_xts="yes" + auth_pam="" + vte="" + virglrenderer="" +@@ -2974,6 +2976,18 @@ EOF + if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then + gcrypt_hmac=yes + fi ++ cat > $TMPC << EOF ++#include ++int main(void) { ++ gcry_cipher_hd_t handle; ++ gcry_cipher_open(&handle, GCRY_CIPHER_AES, GCRY_CIPHER_MODE_XTS, 0); ++ return 0; ++} ++EOF ++ if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then ++ gcrypt_xts=yes ++ qemu_private_xts=no ++ fi + elif test "$gcrypt" = "yes"; then + feature_not_found "gcrypt" "Install gcrypt devel >= 1.5.0" + else +@@ -6404,6 +6418,11 @@ echo "VTE support $vte $(echo_version $vte $vteversion)" + echo "TLS priority $tls_priority" + echo "GNUTLS support $gnutls" + echo "libgcrypt $gcrypt" ++if test "$gcrypt" = "yes" ++then ++ echo " hmac $gcrypt_hmac" ++ echo " XTS $gcrypt_xts" ++fi + echo "nettle $nettle $(echo_version $nettle $nettle_version)" + if test "$nettle" = "yes" + then +@@ -6889,6 +6908,9 @@ if test "$nettle" = "yes" ; then + echo "CONFIG_NETTLE=y" >> $config_host_mak + echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak + fi ++if test "$qemu_private_xts" = "yes" ; then ++ echo "CONFIG_QEMU_PRIVATE_XTS=y" >> $config_host_mak ++fi + if test "$tasn1" = "yes" ; then + echo "CONFIG_TASN1=y" >> $config_host_mak + fi +diff --git a/crypto/Makefile.objs b/crypto/Makefile.objs +index 7fe2fa9da2..cdb01f9de9 100644 +--- a/crypto/Makefile.objs ++++ b/crypto/Makefile.objs +@@ -31,7 +31,7 @@ crypto-obj-y += ivgen-essiv.o + crypto-obj-y += ivgen-plain.o + crypto-obj-y += ivgen-plain64.o + crypto-obj-y += afsplit.o +-crypto-obj-y += xts.o ++crypto-obj-$(CONFIG_QEMU_PRIVATE_XTS) += xts.o + crypto-obj-y += block.o + crypto-obj-y += block-qcow.o + crypto-obj-y += block-luks.o +diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c +index 5cece9b244..2864099527 100644 +--- a/crypto/cipher-gcrypt.c ++++ b/crypto/cipher-gcrypt.c +@@ -19,7 +19,9 @@ + */ + + #include "qemu/osdep.h" ++#ifdef CONFIG_QEMU_PRIVATE_XTS + #include "crypto/xts.h" ++#endif + #include "cipherpriv.h" + + #include +@@ -59,10 +61,12 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg, + typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt; + struct QCryptoCipherGcrypt { + gcry_cipher_hd_t handle; +- gcry_cipher_hd_t tweakhandle; + size_t blocksize; ++#ifdef CONFIG_QEMU_PRIVATE_XTS ++ gcry_cipher_hd_t tweakhandle; + /* Initialization vector or Counter */ + uint8_t *iv; ++#endif + }; + + static void +@@ -74,10 +78,12 @@ qcrypto_gcrypt_cipher_free_ctx(QCryptoCipherGcrypt *ctx, + } + + gcry_cipher_close(ctx->handle); ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + gcry_cipher_close(ctx->tweakhandle); + } + g_free(ctx->iv); ++#endif + g_free(ctx); + } + +@@ -94,8 +100,14 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + + switch (mode) { + case QCRYPTO_CIPHER_MODE_ECB: ++ gcrymode = GCRY_CIPHER_MODE_ECB; ++ break; + case QCRYPTO_CIPHER_MODE_XTS: ++#ifdef CONFIG_QEMU_PRIVATE_XTS + gcrymode = GCRY_CIPHER_MODE_ECB; ++#else ++ gcrymode = GCRY_CIPHER_MODE_XTS; ++#endif + break; + case QCRYPTO_CIPHER_MODE_CBC: + gcrymode = GCRY_CIPHER_MODE_CBC; +@@ -172,6 +184,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + gcry_strerror(err)); + goto error; + } ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0); + if (err != 0) { +@@ -180,6 +193,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + goto error; + } + } ++#endif + + if (alg == QCRYPTO_CIPHER_ALG_DES_RFB) { + /* We're using standard DES cipher from gcrypt, so we need +@@ -191,6 +205,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + g_free(rfbkey); + ctx->blocksize = 8; + } else { ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + nkey /= 2; + err = gcry_cipher_setkey(ctx->handle, key, nkey); +@@ -201,8 +216,11 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + } + err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey); + } else { ++#endif + err = gcry_cipher_setkey(ctx->handle, key, nkey); ++#ifdef CONFIG_QEMU_PRIVATE_XTS + } ++#endif + if (err != 0) { + error_setg(errp, "Cannot set key: %s", + gcry_strerror(err)); +@@ -228,6 +246,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + } + } + ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (mode == QCRYPTO_CIPHER_MODE_XTS) { + if (ctx->blocksize != XTS_BLOCK_SIZE) { + error_setg(errp, +@@ -237,6 +256,7 @@ static QCryptoCipherGcrypt *qcrypto_cipher_ctx_new(QCryptoCipherAlgorithm alg, + } + ctx->iv = g_new0(uint8_t, ctx->blocksize); + } ++#endif + + return ctx; + +@@ -253,6 +273,7 @@ qcrypto_gcrypt_cipher_ctx_free(QCryptoCipher *cipher) + } + + ++#ifdef CONFIG_QEMU_PRIVATE_XTS + static void qcrypto_gcrypt_xts_encrypt(const void *ctx, + size_t length, + uint8_t *dst, +@@ -272,6 +293,7 @@ static void qcrypto_gcrypt_xts_decrypt(const void *ctx, + err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length); + g_assert(err == 0); + } ++#endif + + static int + qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher, +@@ -289,20 +311,23 @@ qcrypto_gcrypt_cipher_encrypt(QCryptoCipher *cipher, + return -1; + } + ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) { + xts_encrypt(ctx->handle, ctx->tweakhandle, + qcrypto_gcrypt_xts_encrypt, + qcrypto_gcrypt_xts_decrypt, + ctx->iv, len, out, in); +- } else { +- err = gcry_cipher_encrypt(ctx->handle, +- out, len, +- in, len); +- if (err != 0) { +- error_setg(errp, "Cannot encrypt data: %s", +- gcry_strerror(err)); +- return -1; +- } ++ return 0; ++ } ++#endif ++ ++ err = gcry_cipher_encrypt(ctx->handle, ++ out, len, ++ in, len); ++ if (err != 0) { ++ error_setg(errp, "Cannot encrypt data: %s", ++ gcry_strerror(err)); ++ return -1; + } + + return 0; +@@ -325,20 +350,23 @@ qcrypto_gcrypt_cipher_decrypt(QCryptoCipher *cipher, + return -1; + } + ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) { + xts_decrypt(ctx->handle, ctx->tweakhandle, + qcrypto_gcrypt_xts_encrypt, + qcrypto_gcrypt_xts_decrypt, + ctx->iv, len, out, in); +- } else { +- err = gcry_cipher_decrypt(ctx->handle, +- out, len, +- in, len); +- if (err != 0) { +- error_setg(errp, "Cannot decrypt data: %s", +- gcry_strerror(err)); +- return -1; +- } ++ return 0; ++ } ++#endif ++ ++ err = gcry_cipher_decrypt(ctx->handle, ++ out, len, ++ in, len); ++ if (err != 0) { ++ error_setg(errp, "Cannot decrypt data: %s", ++ gcry_strerror(err)); ++ return -1; + } + + return 0; +@@ -358,24 +386,27 @@ qcrypto_gcrypt_cipher_setiv(QCryptoCipher *cipher, + return -1; + } + ++#ifdef CONFIG_QEMU_PRIVATE_XTS + if (ctx->iv) { + memcpy(ctx->iv, iv, niv); +- } else { +- if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) { +- err = gcry_cipher_setctr(ctx->handle, iv, niv); +- if (err != 0) { +- error_setg(errp, "Cannot set Counter: %s", ++ return 0; ++ } ++#endif ++ ++ if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) { ++ err = gcry_cipher_setctr(ctx->handle, iv, niv); ++ if (err != 0) { ++ error_setg(errp, "Cannot set Counter: %s", + gcry_strerror(err)); +- return -1; +- } +- } else { +- gcry_cipher_reset(ctx->handle); +- err = gcry_cipher_setiv(ctx->handle, iv, niv); +- if (err != 0) { +- error_setg(errp, "Cannot set IV: %s", ++ return -1; ++ } ++ } else { ++ gcry_cipher_reset(ctx->handle); ++ err = gcry_cipher_setiv(ctx->handle, iv, niv); ++ if (err != 0) { ++ error_setg(errp, "Cannot set IV: %s", + gcry_strerror(err)); +- return -1; +- } ++ return -1; + } + } + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index d6de4e1042..3be60ab999 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -132,7 +132,7 @@ check-unit-y += tests/test-base64$(EXESUF) + check-unit-$(call land,$(CONFIG_BLOCK),$(if $(CONFIG_NETTLE),y,$(CONFIG_GCRYPT))) += tests/test-crypto-pbkdf$(EXESUF) + check-unit-$(CONFIG_BLOCK) += tests/test-crypto-ivgen$(EXESUF) + check-unit-$(CONFIG_BLOCK) += tests/test-crypto-afsplit$(EXESUF) +-check-unit-$(CONFIG_BLOCK) += tests/test-crypto-xts$(EXESUF) ++check-unit-$(if $(CONFIG_BLOCK),$(CONFIG_QEMU_PRIVATE_XTS)) += tests/test-crypto-xts$(EXESUF) + check-unit-$(CONFIG_BLOCK) += tests/test-crypto-block$(EXESUF) + check-unit-y += tests/test-logging$(EXESUF) + check-unit-$(call land,$(CONFIG_BLOCK),$(CONFIG_REPLICATION)) += tests/test-replication$(EXESUF) +-- +2.27.0 + diff --git a/crypto-add-support-for-nettle-s-native-XTS-impl.patch b/crypto-add-support-for-nettle-s-native-XTS-impl.patch new file mode 100644 index 0000000000000000000000000000000000000000..5aed7d626edf019c94c64939f35357274e39136a --- /dev/null +++ b/crypto-add-support-for-nettle-s-native-XTS-impl.patch @@ -0,0 +1,126 @@ +From c4db6fcb2c45b800cd46e088f8265ccc0631b6fc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 14 Oct 2019 17:28:27 +0100 +Subject: [PATCH] crypto: add support for nettle's native XTS impl +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Nettle 3.5.0 will add support for the XTS mode. Use this because long +term we wish to delete QEMU's XTS impl to avoid carrying private crypto +algorithm impls. + +Unfortunately this degrades nettle performance from 612 MB/s to 568 MB/s +as nettle's XTS impl isn't so well optimized yet. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefano Garzarella +Signed-off-by: Daniel P. Berrangé +--- + configure | 18 ++++++++++++++++++ + crypto/cipher-nettle.c | 18 ++++++++++++++++++ + 2 files changed, 36 insertions(+) + +diff --git a/configure b/configure +index 577533e9ed..5dcaac3b95 100755 +--- a/configure ++++ b/configure +@@ -473,6 +473,7 @@ gtk_gl="no" + tls_priority="NORMAL" + gnutls="" + nettle="" ++nettle_xts="no" + gcrypt="" + gcrypt_hmac="no" + auth_pam="" +@@ -2918,6 +2919,19 @@ if test "$nettle" != "no"; then + pass="yes" + fi + fi ++ if test "$pass" = "yes" ++ then ++ cat > $TMPC << EOF ++#include ++int main(void) { ++ return 0; ++} ++EOF ++ if compile_prog "$nettle_cflags" "$nettle_libs" ; then ++ nettle_xts=yes ++ qemu_private_xts=no ++ fi ++ fi + if test "$pass" = "no" && test "$nettle" = "yes"; then + feature_not_found "nettle" "Install nettle devel >= 2.7.1" + else +@@ -6391,6 +6405,10 @@ echo "TLS priority $tls_priority" + echo "GNUTLS support $gnutls" + echo "libgcrypt $gcrypt" + echo "nettle $nettle $(echo_version $nettle $nettle_version)" ++if test "$nettle" = "yes" ++then ++ echo " XTS $nettle_xts" ++fi + echo "libtasn1 $tasn1" + echo "PAM $auth_pam" + echo "iconv support $iconv" +diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c +index d7411bb8ff..7e9a4cc199 100644 +--- a/crypto/cipher-nettle.c ++++ b/crypto/cipher-nettle.c +@@ -19,7 +19,9 @@ + */ + + #include "qemu/osdep.h" ++#ifdef CONFIG_QEMU_PRIVATE_XTS + #include "crypto/xts.h" ++#endif + #include "cipherpriv.h" + + #include +@@ -30,6 +32,9 @@ + #include + #include + #include ++#ifndef CONFIG_QEMU_PRIVATE_XTS ++#include ++#endif + + typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx, + size_t length, +@@ -626,9 +631,15 @@ qcrypto_nettle_cipher_encrypt(QCryptoCipher *cipher, + break; + + case QCRYPTO_CIPHER_MODE_XTS: ++#ifdef CONFIG_QEMU_PRIVATE_XTS + xts_encrypt(ctx->ctx, ctx->ctx_tweak, + ctx->alg_encrypt_wrapper, ctx->alg_encrypt_wrapper, + ctx->iv, len, out, in); ++#else ++ xts_encrypt_message(ctx->ctx, ctx->ctx_tweak, ++ ctx->alg_encrypt_native, ++ ctx->iv, len, out, in); ++#endif + break; + + case QCRYPTO_CIPHER_MODE_CTR: +@@ -673,9 +684,16 @@ qcrypto_nettle_cipher_decrypt(QCryptoCipher *cipher, + break; + + case QCRYPTO_CIPHER_MODE_XTS: ++#ifdef CONFIG_QEMU_PRIVATE_XTS + xts_decrypt(ctx->ctx, ctx->ctx_tweak, + ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper, + ctx->iv, len, out, in); ++#else ++ xts_decrypt_message(ctx->ctx, ctx->ctx_tweak, ++ ctx->alg_decrypt_native, ++ ctx->alg_encrypt_native, ++ ctx->iv, len, out, in); ++#endif + break; + case QCRYPTO_CIPHER_MODE_CTR: + ctr_crypt(ctx->ctx, ctx->alg_encrypt_native, +-- +2.27.0 + diff --git a/delete-the-in-tpm.txt.patch b/delete-the-in-tpm.txt.patch new file mode 100644 index 0000000000000000000000000000000000000000..01ce3ace541aca115bccd47100f5dbd954643764 --- /dev/null +++ b/delete-the-in-tpm.txt.patch @@ -0,0 +1,26 @@ +From 3020ae141ef40f06b17eb0f16d2a3c6d5872ff89 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Wed, 29 Jul 2020 08:45:50 +0000 +Subject: [PATCH 05/19] delete the in tpm.txt + +Signed-off-by: jiangfangjie +--- + docs/specs/tpm.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt +index 5d8c26b1..9c8cca04 100644 +--- a/docs/specs/tpm.txt ++++ b/docs/specs/tpm.txt +@@ -89,7 +89,7 @@ TPM upon reboot. The PPI specification defines the operation requests and the + actions the firmware has to take. The system administrator passes the operation + request number to the firmware through an ACPI interface which writes this + number to a memory location that the firmware knows. Upon reboot, the firmware +-finds the number and sends commands to the the TPM. The firmware writes the TPM ++finds the number and sends commands to the TPM. The firmware writes the TPM + result code and the operation request number to a memory location that ACPI can + read from and pass the result on to the administrator. + +-- +2.23.0 + diff --git a/display-bochs-display-fix-memory-leak.patch b/display-bochs-display-fix-memory-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..4dd3aa61c2b2b1026e0065c708ead4aeb79b3c21 --- /dev/null +++ b/display-bochs-display-fix-memory-leak.patch @@ -0,0 +1,35 @@ +From 7edca67dc630e31043644e87ede2e05e504f845b Mon Sep 17 00:00:00 2001 +From: Cameron Esfahani +Date: Tue, 10 Dec 2019 13:27:54 -0800 +Subject: [PATCH 1/8] display/bochs-display: fix memory leak +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix memory leak in bochs_display_update(). Leaks 304 bytes per frame. + +Fixes: 33ebad54056 +Signed-off-by: Cameron Esfahani +Message-Id: +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Gerd Hoffmann +--- + hw/display/bochs-display.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/display/bochs-display.c b/hw/display/bochs-display.c +index 8e83b51..b601b2f 100644 +--- a/hw/display/bochs-display.c ++++ b/hw/display/bochs-display.c +@@ -251,6 +251,8 @@ static void bochs_display_update(void *opaque) + dpy_gfx_update(s->con, 0, ys, + mode.width, y - ys); + } ++ ++ g_free(snap); + } + } + +-- +1.8.3.1 + diff --git a/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch b/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch new file mode 100644 index 0000000000000000000000000000000000000000..c61c9fd848c4e1d68baa778388c8440a8d28ec32 --- /dev/null +++ b/dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch @@ -0,0 +1,79 @@ +From fbde196c30e4797a51bda046ba514b187963d4ba Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 29 Jul 2019 23:34:16 +0200 +Subject: [PATCH] dma-helpers: ensure AIO callback is invoked after + cancellation + +dma_aio_cancel unschedules the BH if there is one, which corresponds +to the reschedule_dma case of dma_blk_cb. This can stall the DMA +permanently, because dma_complete will never get invoked and therefore +nobody will ever invoke the original AIO callback in dbs->common.cb. + +Fix this by invoking the callback (which is ensured to happen after +a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and +add assertions to check that the DMA state machine is indeed waiting +for dma_complete or reschedule_dma, but never both. + +Reported-by: John Snow +Signed-off-by: Paolo Bonzini +Message-id: 20190729213416.1972-1-pbonzini@redhat.com +Signed-off-by: John Snow +(cherry picked from commit 539343c0a47e19d5dd64d846d64d084d9793681f) +Signed-off-by: Michael Roth +--- + dma-helpers.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/dma-helpers.c b/dma-helpers.c +index 2d7e02d35e..d3871dc61e 100644 +--- a/dma-helpers.c ++++ b/dma-helpers.c +@@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; + ++ assert(!dbs->acb && dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + dma_blk_cb(dbs, 0); +@@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + { + trace_dma_complete(dbs, ret, dbs->common.cb); + ++ assert(!dbs->acb && !dbs->bh); + dma_blk_unmap(dbs); + if (dbs->common.cb) { + dbs->common.cb(dbs->common.opaque, ret); + } + qemu_iovec_destroy(&dbs->iov); +- if (dbs->bh) { +- qemu_bh_delete(dbs->bh); +- dbs->bh = NULL; +- } + qemu_aio_unref(dbs); + } + +@@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb) + + trace_dma_aio_cancel(dbs); + ++ assert(!(dbs->acb && dbs->bh)); + if (dbs->acb) { ++ /* This will invoke dma_blk_cb. */ + blk_aio_cancel_async(dbs->acb); ++ return; + } ++ + if (dbs->bh) { + cpu_unregister_map_client(dbs->bh); + qemu_bh_delete(dbs->bh); + dbs->bh = NULL; + } ++ if (dbs->common.cb) { ++ dbs->common.cb(dbs->common.opaque, -ECANCELED); ++ } + } + + static AioContext *dma_get_aio_context(BlockAIOCB *acb) +-- +2.23.0 diff --git a/doc-Update-multi-thread-compression-doc.patch b/doc-Update-multi-thread-compression-doc.patch new file mode 100644 index 0000000000000000000000000000000000000000..33ef835acaf040ed3a404b6f0314a9e5c4af0673 --- /dev/null +++ b/doc-Update-multi-thread-compression-doc.patch @@ -0,0 +1,85 @@ +From 642df85795097017e9370a9721f702cbec50c173 Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 16:36:47 +0800 +Subject: [PATCH] doc: Update multi-thread compression doc + +Modify the doc to fit the previous changes. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + docs/multi-thread-compression.txt | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt +index bb88c6bdf1..d429963cb0 100644 +--- a/docs/multi-thread-compression.txt ++++ b/docs/multi-thread-compression.txt +@@ -33,14 +33,15 @@ thread compression can be used to accelerate the compression process. + + The decompression speed of Zlib is at least 4 times as quick as + compression, if the source and destination CPU have equal speed, +-keeping the compression thread count 4 times the decompression +-thread count can avoid resource waste. ++and you choose Zlib as compression method, keeping the compression ++thread count 4 times the decompression thread count can avoid resource waste. + + Compression level can be used to control the compression speed and the +-compression ratio. High compression ratio will take more time, level 0 +-stands for no compression, level 1 stands for the best compression +-speed, and level 9 stands for the best compression ratio. Users can +-select a level number between 0 and 9. ++compression ratio. High compression ratio will take more time, ++level 1 stands for the best compression speed, and higher level means higher ++compression ration. For Zlib, users can select a level number between 0 and 9, ++where level 0 stands for no compression. For Zstd, users can select a ++level number between 1 and 22. + + + When to use the multiple thread compression in live migration +@@ -116,16 +117,19 @@ to support the multiple thread compression migration: + 2. Activate compression on the source: + {qemu} migrate_set_capability compress on + +-3. Set the compression thread count on source: ++3. Set the compression method: ++ {qemu} migrate_set_parameter compress_method zstd ++ ++4. Set the compression thread count on source: + {qemu} migrate_set_parameter compress_threads 12 + +-4. Set the compression level on the source: ++5. Set the compression level on the source: + {qemu} migrate_set_parameter compress_level 1 + +-5. Set the decompression thread count on destination: ++6. Set the decompression thread count on destination: + {qemu} migrate_set_parameter decompress_threads 3 + +-6. Start outgoing migration: ++7. Start outgoing migration: + {qemu} migrate -d tcp:destination.host:4444 + {qemu} info migrate + Capabilities: ... compress: on +@@ -136,6 +140,7 @@ The following are the default settings: + compress_threads: 8 + decompress_threads: 2 + compress_level: 1 (which means best speed) ++ compress_method: zlib + + So, only the first two steps are required to use the multiple + thread compression in migration. You can do more if the default +@@ -143,7 +148,7 @@ settings are not appropriate. + + TODO + ==== +-Some faster (de)compression method such as LZ4 and Quicklz can help +-to reduce the CPU consumption when doing (de)compression. If using +-these faster (de)compression method, less (de)compression threads ++Comparing to Zlib, Some faster (de)compression method such as LZ4 ++and Quicklz can help to reduce the CPU consumption when doing (de)compression. ++If using these faster (de)compression method, less (de)compression threads + are needed when doing the migration. +-- +2.27.0 + diff --git a/docs-specs-Add-ACPI-GED-documentation.patch b/docs-specs-Add-ACPI-GED-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..46e8c17483ba33af2b75e954233c3cbdc5c7cddc --- /dev/null +++ b/docs-specs-Add-ACPI-GED-documentation.patch @@ -0,0 +1,107 @@ +From 9c1752703fb8a5b70985cf4c9caabc3388c5953b Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:31 +0100 +Subject: [PATCH] docs/specs: Add ACPI GED documentation + +Documents basic concepts of ACPI Generic Event device(GED) +and interface between QEMU and the ACPI BIOS. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Message-Id: <20190918130633.4872-10-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + docs/specs/acpi_hw_reduced_hotplug.rst | 70 ++++++++++++++++++++++++++ + docs/specs/index.rst | 1 + + 2 files changed, 71 insertions(+) + create mode 100644 docs/specs/acpi_hw_reduced_hotplug.rst + +diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst +new file mode 100644 +index 0000000000..911a98255b +--- /dev/null ++++ b/docs/specs/acpi_hw_reduced_hotplug.rst +@@ -0,0 +1,70 @@ ++================================================== ++QEMU and ACPI BIOS Generic Event Device interface ++================================================== ++ ++The ACPI *Generic Event Device* (GED) is a HW reduced platform ++specific device introduced in ACPI v6.1 that handles all platform ++events, including the hotplug ones. GED is modelled as a device ++in the namespace with a _HID defined to be ACPI0013. This document ++describes the interface between QEMU and the ACPI BIOS. ++ ++GED allows HW reduced platforms to handle interrupts in ACPI ASL ++statements. It follows a very similar approach to the _EVT method ++from GPIO events. All interrupts are listed in _CRS and the handler ++is written in _EVT method. However, the QEMU implementation uses a ++single interrupt for the GED device, relying on an IO memory region ++to communicate the type of device affected by the interrupt. This way, ++we can support up to 32 events with a unique interrupt. ++ ++**Here is an example,** ++ ++:: ++ ++ Device (\_SB.GED) ++ { ++ Name (_HID, "ACPI0013") ++ Name (_UID, Zero) ++ Name (_CRS, ResourceTemplate () ++ { ++ Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) ++ { ++ 0x00000029, ++ } ++ }) ++ OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) ++ Field (EREG, DWordAcc, NoLock, WriteAsZeros) ++ { ++ ESEL, 32 ++ } ++ Method (_EVT, 1, Serialized) ++ { ++ Local0 = ESEL // ESEL = IO memory region which specifies the ++ // device type. ++ If (((Local0 & One) == One)) ++ { ++ MethodEvent1() ++ } ++ If ((Local0 & 0x2) == 0x2) ++ { ++ MethodEvent2() ++ } ++ ... ++ } ++ } ++ ++GED IO interface (4 byte access) ++-------------------------------- ++**read access:** ++ ++:: ++ ++ [0x0-0x3] Event selector bit field (32 bit) set by QEMU. ++ ++ bits: ++ 0: Memory hotplug event ++ 1: System power down event ++ 2-31: Reserved ++ ++**write_access:** ++ ++Nothing is expected to be written into GED IO memory +diff --git a/docs/specs/index.rst b/docs/specs/index.rst +index 40adb97c5e..984ba44029 100644 +--- a/docs/specs/index.rst ++++ b/docs/specs/index.rst +@@ -12,3 +12,4 @@ Contents: + + ppc-xive + ppc-spapr-xive ++ acpi_hw_reduced_hotplug +-- +2.19.1 diff --git a/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch b/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0be64a937fd5b1f78c54f5f74854f388c023786 --- /dev/null +++ b/docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch @@ -0,0 +1,66 @@ +From dd7f6cc3bcd71681920e3530f2c53041c812c5d3 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 5 Mar 2020 17:51:46 +0100 +Subject: [PATCH 16/19] docs/specs/tpm: Document TPM_TIS sysbus device for ARM + +Update the documentation with recent changes related to the +sysbus TPM_TIS device addition and add the command line +to be used with arm VIRT. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Message-id: 20200305165149.618-8-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + docs/specs/tpm.rst | 25 ++++++++++++++++++++++++- + 1 file changed, 24 insertions(+), 1 deletion(-) + +diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst +index 2bdf637f..da9eb39c 100644 +--- a/docs/specs/tpm.rst ++++ b/docs/specs/tpm.rst +@@ -18,9 +18,15 @@ The TIS interface makes a memory mapped IO region in the area + 0xfed40000-0xfed44fff available to the guest operating system. + + QEMU files related to TPM TIS interface: +- - ``hw/tpm/tpm_tis.c`` ++ - ``hw/tpm/tpm_tis_common.c`` ++ - ``hw/tpm/tpm_tis_isa.c`` ++ - ``hw/tpm/tpm_tis_sysbus.c`` + - ``hw/tpm/tpm_tis.h`` + ++Both an ISA device and a sysbus device are available. The former is ++used with pc/q35 machine while the latter can be instantiated in the ++ARM virt machine. ++ + CRB interface + ------------- + +@@ -325,6 +331,23 @@ In case a pSeries machine is emulated, use the following command line: + -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ + -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 + ++In case an ARM virt machine is emulated, use the following command line: ++ ++.. code-block:: console ++ ++ qemu-system-aarch64 -machine virt,gic-version=3,accel=kvm \ ++ -cpu host -m 4G \ ++ -nographic -no-acpi \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-tis-device,tpmdev=tpm0 \ ++ -device virtio-blk-pci,drive=drv0 \ ++ -drive format=qcow2,file=hda.qcow2,if=none,id=drv0 \ ++ -drive if=pflash,format=raw,file=flash0.img,readonly \ ++ -drive if=pflash,format=raw,file=flash1.img ++ ++ On ARM, ACPI boot with TPM is not yet supported. ++ + In case SeaBIOS is used as firmware, it should show the TPM menu item + after entering the menu with 'ESC'. + +-- +2.23.0 + diff --git a/docs-specs-tpm-reST-ify-TPM-documentation.patch b/docs-specs-tpm-reST-ify-TPM-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4648994bde2fc4b68ce49f28f4a612f53e65551 --- /dev/null +++ b/docs-specs-tpm-reST-ify-TPM-documentation.patch @@ -0,0 +1,993 @@ +From 5d1865496ca39f08142a0c1eb2c9b14ec1ec9140 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Tue, 21 Jan 2020 10:29:35 -0500 +Subject: [PATCH 09/19] docs/specs/tpm: reST-ify TPM documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Marc-André Lureau +Reviewed-by: Stefan Berger +Message-Id: <20200121152935.649898-7-stefanb@linux.ibm.com> +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + docs/specs/index.rst | 1 + + docs/specs/tpm.rst | 503 +++++++++++++++++++++++++++++++++++++++++++ + docs/specs/tpm.txt | 445 -------------------------------------- + 3 files changed, 504 insertions(+), 445 deletions(-) + create mode 100644 docs/specs/tpm.rst + delete mode 100644 docs/specs/tpm.txt + +diff --git a/docs/specs/index.rst b/docs/specs/index.rst +index 984ba440..de46a8b5 100644 +--- a/docs/specs/index.rst ++++ b/docs/specs/index.rst +@@ -13,3 +13,4 @@ Contents: + ppc-xive + ppc-spapr-xive + acpi_hw_reduced_hotplug ++ tpm +diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst +new file mode 100644 +index 00000000..2bdf637f +--- /dev/null ++++ b/docs/specs/tpm.rst +@@ -0,0 +1,503 @@ ++=============== ++QEMU TPM Device ++=============== ++ ++Guest-side hardware interface ++============================= ++ ++TIS interface ++------------- ++ ++The QEMU TPM emulation implements a TPM TIS hardware interface ++following the Trusted Computing Group's specification "TCG PC Client ++Specific TPM Interface Specification (TIS)", Specification Version ++1.3, 21 March 2013. (see the `TIS specification`_, or a later version ++of it). ++ ++The TIS interface makes a memory mapped IO region in the area ++0xfed40000-0xfed44fff available to the guest operating system. ++ ++QEMU files related to TPM TIS interface: ++ - ``hw/tpm/tpm_tis.c`` ++ - ``hw/tpm/tpm_tis.h`` ++ ++CRB interface ++------------- ++ ++QEMU also implements a TPM CRB interface following the Trusted ++Computing Group's specification "TCG PC Client Platform TPM Profile ++(PTP) Specification", Family "2.0", Level 00 Revision 01.03 v22, May ++22, 2017. (see the `CRB specification`_, or a later version of it) ++ ++The CRB interface makes a memory mapped IO region in the area ++0xfed40000-0xfed40fff (1 locality) available to the guest ++operating system. ++ ++QEMU files related to TPM CRB interface: ++ - ``hw/tpm/tpm_crb.c`` ++ ++SPAPR interface ++--------------- ++ ++pSeries (ppc64) machines offer a tpm-spapr device model. ++ ++QEMU files related to the SPAPR interface: ++ - ``hw/tpm/tpm_spapr.c`` ++ ++fw_cfg interface ++================ ++ ++The bios/firmware may read the ``"etc/tpm/config"`` fw_cfg entry for ++configuring the guest appropriately. ++ ++The entry of 6 bytes has the following content, in little-endian: ++ ++.. code-block:: c ++ ++ #define TPM_VERSION_UNSPEC 0 ++ #define TPM_VERSION_1_2 1 ++ #define TPM_VERSION_2_0 2 ++ ++ #define TPM_PPI_VERSION_NONE 0 ++ #define TPM_PPI_VERSION_1_30 1 ++ ++ struct FwCfgTPMConfig { ++ uint32_t tpmppi_address; /* PPI memory location */ ++ uint8_t tpm_version; /* TPM version */ ++ uint8_t tpmppi_version; /* PPI version */ ++ }; ++ ++ACPI interface ++============== ++ ++The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT ++and passes it into the guest through the fw_cfg device. The device ++description contains the base address of the TIS interface 0xfed40000 ++and the size of the MMIO area (0x5000). In case a TPM2 is used by ++QEMU, a TPM2 ACPI table is also provided. The device is described to ++be used in polling mode rather than interrupt mode primarily because ++no unused IRQ could be found. ++ ++To support measurement logs to be written by the firmware, ++e.g. SeaBIOS, a TCPA table is implemented. This table provides a 64kb ++buffer where the firmware can write its log into. For TPM 2 only a ++more recent version of the TPM2 table provides support for ++measurements logs and a TCPA table does not need to be created. ++ ++The TCPA and TPM2 ACPI tables follow the Trusted Computing Group ++specification "TCG ACPI Specification" Family "1.2" and "2.0", Level ++00 Revision 00.37. (see the `ACPI specification`_, or a later version ++of it) ++ ++ACPI PPI Interface ++------------------ ++ ++QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and ++TPM 2. This interface requires ACPI and firmware support. (see the ++`PPI specification`_) ++ ++PPI enables a system administrator (root) to request a modification to ++the TPM upon reboot. The PPI specification defines the operation ++requests and the actions the firmware has to take. The system ++administrator passes the operation request number to the firmware ++through an ACPI interface which writes this number to a memory ++location that the firmware knows. Upon reboot, the firmware finds the ++number and sends commands to the TPM. The firmware writes the TPM ++result code and the operation request number to a memory location that ++ACPI can read from and pass the result on to the administrator. ++ ++The PPI specification defines a set of mandatory and optional ++operations for the firmware to implement. The ACPI interface also ++allows an administrator to list the supported operations. In QEMU the ++ACPI code is generated by QEMU, yet the firmware needs to implement ++support on a per-operations basis, and different firmwares may support ++a different subset. Therefore, QEMU introduces the virtual memory ++device for PPI where the firmware can indicate which operations it ++supports and ACPI can enable the ones that are supported and disable ++all others. This interface lies in main memory and has the following ++layout: ++ ++ +-------------+--------+--------+-------------------------------------------+ ++ | Field | Length | Offset | Description | ++ +=============+========+========+===========================================+ ++ | ``func`` | 0x100 | 0x000 | Firmware sets values for each supported | ++ | | | | operation. See defined values below. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``ppin`` | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | ++ | | | | Not supported. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``ppip`` | 0x4 | 0x101 | ACPI function index to pass to SMM code. | ++ | | | | Set by ACPI. Not supported. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``pprp`` | 0x4 | 0x105 | Result of last executed operation. Set by | ++ | | | | firmware. See function index 5 for values.| ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``pprq`` | 0x4 | 0x109 | Operation request number to execute. See | ++ | | | | 'Physical Presence Interface Operation | ++ | | | | Summary' tables in specs. Set by ACPI. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``pprm`` | 0x4 | 0x10d | Operation request optional parameter. | ++ | | | | Values depend on operation. Set by ACPI. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``lppr`` | 0x4 | 0x111 | Last executed operation request number. | ++ | | | | Copied from pprq field by firmware. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``fret`` | 0x4 | 0x115 | Result code from SMM function. | ++ | | | | Not supported. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``res1`` | 0x40 | 0x119 | Reserved for future use | ++ +-------------+--------+--------+-------------------------------------------+ ++ |``next_step``| 0x1 | 0x159 | Operation to execute after reboot by | ++ | | | | firmware. Used by firmware. | ++ +-------------+--------+--------+-------------------------------------------+ ++ | ``movv`` | 0x1 | 0x15a | Memory overwrite variable | ++ +-------------+--------+--------+-------------------------------------------+ ++ ++The following values are supported for the ``func`` field. They ++correspond to the values used by ACPI function index 8. ++ ++ +----------+-------------------------------------------------------------+ ++ | Value | Description | ++ +==========+=============================================================+ ++ | 0 | Operation is not implemented. | ++ +----------+-------------------------------------------------------------+ ++ | 1 | Operation is only accessible through firmware. | ++ +----------+-------------------------------------------------------------+ ++ | 2 | Operation is blocked for OS by firmware configuration. | ++ +----------+-------------------------------------------------------------+ ++ | 3 | Operation is allowed and physically present user required. | ++ +----------+-------------------------------------------------------------+ ++ | 4 | Operation is allowed and physically present user is not | ++ | | required. | ++ +----------+-------------------------------------------------------------+ ++ ++The location of the table is given by the fw_cfg ``tpmppi_address`` ++field. The PPI memory region size is 0x400 (``TPM_PPI_ADDR_SIZE``) to ++leave enough room for future updates. ++ ++QEMU files related to TPM ACPI tables: ++ - ``hw/i386/acpi-build.c`` ++ - ``include/hw/acpi/tpm.h`` ++ ++TPM backend devices ++=================== ++ ++The TPM implementation is split into two parts, frontend and ++backend. The frontend part is the hardware interface, such as the TPM ++TIS interface described earlier, and the other part is the TPM backend ++interface. The backend interfaces implement the interaction with a TPM ++device, which may be a physical or an emulated device. The split ++between the front- and backend devices allows a frontend to be ++connected with any available backend. This enables the TIS interface ++to be used with the passthrough backend or the swtpm backend. ++ ++QEMU files related to TPM backends: ++ - ``backends/tpm.c`` ++ - ``include/sysemu/tpm_backend.h`` ++ - ``include/sysemu/tpm_backend_int.h`` ++ ++The QEMU TPM passthrough device ++------------------------------- ++ ++In case QEMU is run on Linux as the host operating system it is ++possible to make the hardware TPM device available to a single QEMU ++guest. In this case the user must make sure that no other program is ++using the device, e.g., /dev/tpm0, before trying to start QEMU with ++it. ++ ++The passthrough driver uses the host's TPM device for sending TPM ++commands and receiving responses from. Besides that it accesses the ++TPM device's sysfs entry for support of command cancellation. Since ++none of the state of a hardware TPM can be migrated between hosts, ++virtual machine migration is disabled when the TPM passthrough driver ++is used. ++ ++Since the host's TPM device will already be initialized by the host's ++firmware, certain commands, e.g. ``TPM_Startup()``, sent by the ++virtual firmware for device initialization, will fail. In this case ++the firmware should not use the TPM. ++ ++Sharing the device with the host is generally not a recommended usage ++scenario for a TPM device. The primary reason for this is that two ++operating systems can then access the device's single set of ++resources, such as platform configuration registers ++(PCRs). Applications or kernel security subsystems, such as the Linux ++Integrity Measurement Architecture (IMA), are not expecting to share ++PCRs. ++ ++QEMU files related to the TPM passthrough device: ++ - ``hw/tpm/tpm_passthrough.c`` ++ - ``hw/tpm/tpm_util.c`` ++ - ``hw/tpm/tpm_util.h`` ++ ++ ++Command line to start QEMU with the TPM passthrough device using the host's ++hardware TPM ``/dev/tpm0``: ++ ++.. code-block:: console ++ ++ qemu-system-x86_64 -display sdl -accel kvm \ ++ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ ++ -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ ++ -device tpm-tis,tpmdev=tpm0 test.img ++ ++ ++The following commands should result in similar output inside the VM ++with a Linux kernel that either has the TPM TIS driver built-in or ++available as a module: ++ ++.. code-block:: console ++ ++ # dmesg | grep -i tpm ++ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) ++ ++ # dmesg | grep TCPA ++ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ ++ BXPCTCPA 0000001 BXPC 00000001) ++ ++ # ls -l /dev/tpm* ++ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 ++ ++ # find /sys/devices/ | grep pcrs$ | xargs cat ++ PCR-00: 35 4E 3B CE 23 9F 38 59 ... ++ ... ++ PCR-23: 00 00 00 00 00 00 00 00 ... ++ ++The QEMU TPM emulator device ++---------------------------- ++ ++The TPM emulator device uses an external TPM emulator called 'swtpm' ++for sending TPM commands to and receiving responses from. The swtpm ++program must have been started before trying to access it through the ++TPM emulator with QEMU. ++ ++The TPM emulator implements a command channel for transferring TPM ++commands and responses as well as a control channel over which control ++commands can be sent. (see the `SWTPM protocol`_ specification) ++ ++The control channel serves the purpose of resetting, initializing, and ++migrating the TPM state, among other things. ++ ++The swtpm program behaves like a hardware TPM and therefore needs to ++be initialized by the firmware running inside the QEMU virtual ++machine. One necessary step for initializing the device is to send ++the TPM_Startup command to it. SeaBIOS, for example, has been ++instrumented to initialize a TPM 1.2 or TPM 2 device using this ++command. ++ ++QEMU files related to the TPM emulator device: ++ - ``hw/tpm/tpm_emulator.c`` ++ - ``hw/tpm/tpm_util.c`` ++ - ``hw/tpm/tpm_util.h`` ++ ++The following commands start the swtpm with a UnixIO control channel over ++a socket interface. They do not need to be run as root. ++ ++.. code-block:: console ++ ++ mkdir /tmp/mytpm1 ++ swtpm socket --tpmstate dir=/tmp/mytpm1 \ ++ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ ++ --log level=20 ++ ++Command line to start QEMU with the TPM emulator device communicating ++with the swtpm (x86): ++ ++.. code-block:: console ++ ++ qemu-system-x86_64 -display sdl -accel kvm \ ++ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-tis,tpmdev=tpm0 test.img ++ ++In case a pSeries machine is emulated, use the following command line: ++ ++.. code-block:: console ++ ++ qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ ++ -m 1024 -bios slof.bin -boot menu=on \ ++ -nodefaults -device VGA -device pci-ohci -device usb-kbd \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-spapr,tpmdev=tpm0 \ ++ -device spapr-vscsi,id=scsi0,reg=0x00002000 \ ++ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ ++ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 ++ ++In case SeaBIOS is used as firmware, it should show the TPM menu item ++after entering the menu with 'ESC'. ++ ++.. code-block:: console ++ ++ Select boot device: ++ 1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] ++ [...] ++ 5. Legacy option rom ++ ++ t. TPM Configuration ++ ++The following commands should result in similar output inside the VM ++with a Linux kernel that either has the TPM TIS driver built-in or ++available as a module: ++ ++.. code-block:: console ++ ++ # dmesg | grep -i tpm ++ [ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) ++ ++ # dmesg | grep TCPA ++ [ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ ++ BXPCTCPA 0000001 BXPC 00000001) ++ ++ # ls -l /dev/tpm* ++ crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 ++ ++ # find /sys/devices/ | grep pcrs$ | xargs cat ++ PCR-00: 35 4E 3B CE 23 9F 38 59 ... ++ ... ++ PCR-23: 00 00 00 00 00 00 00 00 ... ++ ++Migration with the TPM emulator ++=============================== ++ ++The TPM emulator supports the following types of virtual machine ++migration: ++ ++- VM save / restore (migration into a file) ++- Network migration ++- Snapshotting (migration into storage like QoW2 or QED) ++ ++The following command sequences can be used to test VM save / restore. ++ ++In a 1st terminal start an instance of a swtpm using the following command: ++ ++.. code-block:: console ++ ++ mkdir /tmp/mytpm1 ++ swtpm socket --tpmstate dir=/tmp/mytpm1 \ ++ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ ++ --log level=20 --tpm2 ++ ++In a 2nd terminal start the VM: ++ ++.. code-block:: console ++ ++ qemu-system-x86_64 -display sdl -accel kvm \ ++ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-tis,tpmdev=tpm0 \ ++ -monitor stdio \ ++ test.img ++ ++Verify that the attached TPM is working as expected using applications ++inside the VM. ++ ++To store the state of the VM use the following command in the QEMU ++monitor in the 2nd terminal: ++ ++.. code-block:: console ++ ++ (qemu) migrate "exec:cat > testvm.bin" ++ (qemu) quit ++ ++At this point a file called ``testvm.bin`` should exists and the swtpm ++and QEMU processes should have ended. ++ ++To test 'VM restore' you have to start the swtpm with the same ++parameters as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 ++must now be passed again on the command line. ++ ++In the 1st terminal restart the swtpm with the same command line as ++before: ++ ++.. code-block:: console ++ ++ swtpm socket --tpmstate dir=/tmp/mytpm1 \ ++ --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ ++ --log level=20 --tpm2 ++ ++In the 2nd terminal restore the state of the VM using the additional ++'-incoming' option. ++ ++.. code-block:: console ++ ++ qemu-system-x86_64 -display sdl -accel kvm \ ++ -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-tis,tpmdev=tpm0 \ ++ -incoming "exec:cat < testvm.bin" \ ++ test.img ++ ++Troubleshooting migration ++------------------------- ++ ++There are several reasons why migration may fail. In case of problems, ++please ensure that the command lines adhere to the following rules ++and, if possible, that identical versions of QEMU and swtpm are used ++at all times. ++ ++VM save and restore: ++ ++ - QEMU command line parameters should be identical apart from the ++ '-incoming' option on VM restore ++ ++ - swtpm command line parameters should be identical ++ ++VM migration to 'localhost': ++ ++ - QEMU command line parameters should be identical apart from the ++ '-incoming' option on the destination side ++ ++ - swtpm command line parameters should point to two different ++ directories on the source and destination swtpm (--tpmstate dir=...) ++ (especially if different versions of libtpms were to be used on the ++ same machine). ++ ++VM migration across the network: ++ ++ - QEMU command line parameters should be identical apart from the ++ '-incoming' option on the destination side ++ ++ - swtpm command line parameters should be identical ++ ++VM Snapshotting: ++ - QEMU command line parameters should be identical ++ ++ - swtpm command line parameters should be identical ++ ++ ++Besides that, migration failure reasons on the swtpm level may include ++the following: ++ ++ - the versions of the swtpm on the source and destination sides are ++ incompatible ++ ++ - downgrading of TPM state may not be supported ++ ++ - the source and destination libtpms were compiled with different ++ compile-time options and the destination side refuses to accept the ++ state ++ ++ - different migration keys are used on the source and destination side ++ and the destination side cannot decrypt the migrated state ++ (swtpm ... --migration-key ... ) ++ ++ ++.. _TIS specification: ++ https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ ++ ++.. _CRB specification: ++ https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ ++ ++ ++.. _ACPI specification: ++ https://trustedcomputinggroup.org/tcg-acpi-specification/ ++ ++.. _PPI specification: ++ https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ ++ ++.. _SWTPM protocol: ++ https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod +diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt +deleted file mode 100644 +index 9c3e67d8..00000000 +--- a/docs/specs/tpm.txt ++++ /dev/null +@@ -1,445 +0,0 @@ +-QEMU TPM Device +-=============== +- +-= Guest-side Hardware Interface = +- +-The QEMU TPM emulation implements a TPM TIS hardware interface following the +-Trusted Computing Group's specification "TCG PC Client Specific TPM Interface +-Specification (TIS)", Specification Version 1.3, 21 March 2013. This +-specification, or a later version of it, can be accessed from the following +-URL: +- +-https://trustedcomputinggroup.org/pc-client-work-group-pc-client-specific-tpm-interface-specification-tis/ +- +-The TIS interface makes a memory mapped IO region in the area 0xfed40000 - +-0xfed44fff available to the guest operating system. +- +- +-QEMU files related to TPM TIS interface: +- - hw/tpm/tpm_tis.c +- - hw/tpm/tpm_tis.h +- +- +-QEMU also implements a TPM CRB interface following the Trusted Computing +-Group's specification "TCG PC Client Platform TPM Profile (PTP) +-Specification", Family "2.0", Level 00 Revision 01.03 v22, May 22, 2017. +-This specification, or a later version of it, can be accessed from the +-following URL: +- +-https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/ +- +-The CRB interface makes a memory mapped IO region in the area 0xfed40000 - +-0xfed40fff (1 locality) available to the guest operating system. +- +-QEMU files related to TPM CRB interface: +- - hw/tpm/tpm_crb.c +- +- +-pSeries (ppc64) machines offer a tpm-spapr device model. +- +-QEMU files related to the SPAPR interface: +- - hw/tpm/tpm_spapr.c +- +-= fw_cfg interface = +- +-The bios/firmware may read the "etc/tpm/config" fw_cfg entry for +-configuring the guest appropriately. +- +-The entry of 6 bytes has the following content, in little-endian: +- +- #define TPM_VERSION_UNSPEC 0 +- #define TPM_VERSION_1_2 1 +- #define TPM_VERSION_2_0 2 +- +- #define TPM_PPI_VERSION_NONE 0 +- #define TPM_PPI_VERSION_1_30 1 +- +- struct FwCfgTPMConfig { +- uint32_t tpmppi_address; /* PPI memory location */ +- uint8_t tpm_version; /* TPM version */ +- uint8_t tpmppi_version; /* PPI version */ +- }; +- +-= ACPI Interface = +- +-The TPM device is defined with ACPI ID "PNP0C31". QEMU builds a SSDT and passes +-it into the guest through the fw_cfg device. The device description contains +-the base address of the TIS interface 0xfed40000 and the size of the MMIO area +-(0x5000). In case a TPM2 is used by QEMU, a TPM2 ACPI table is also provided. +-The device is described to be used in polling mode rather than interrupt mode +-primarily because no unused IRQ could be found. +- +-To support measurement logs to be written by the firmware, e.g. SeaBIOS, a TCPA +-table is implemented. This table provides a 64kb buffer where the firmware can +-write its log into. For TPM 2 only a more recent version of the TPM2 table +-provides support for measurements logs and a TCPA table does not need to be +-created. +- +-The TCPA and TPM2 ACPI tables follow the Trusted Computing Group specification +-"TCG ACPI Specification" Family "1.2" and "2.0", Level 00 Revision 00.37. This +-specification, or a later version of it, can be accessed from the following +-URL: +- +-https://trustedcomputinggroup.org/tcg-acpi-specification/ +- +-== ACPI PPI Interface == +- +-QEMU supports the Physical Presence Interface (PPI) for TPM 1.2 and TPM 2. This +-interface requires ACPI and firmware support. The specification can be found at +-the following URL: +- +-https://trustedcomputinggroup.org/resource/tcg-physical-presence-interface-specification/ +- +-PPI enables a system administrator (root) to request a modification to the +-TPM upon reboot. The PPI specification defines the operation requests and the +-actions the firmware has to take. The system administrator passes the operation +-request number to the firmware through an ACPI interface which writes this +-number to a memory location that the firmware knows. Upon reboot, the firmware +-finds the number and sends commands to the TPM. The firmware writes the TPM +-result code and the operation request number to a memory location that ACPI can +-read from and pass the result on to the administrator. +- +-The PPI specification defines a set of mandatory and optional operations for +-the firmware to implement. The ACPI interface also allows an administrator to +-list the supported operations. In QEMU the ACPI code is generated by QEMU, yet +-the firmware needs to implement support on a per-operations basis, and +-different firmwares may support a different subset. Therefore, QEMU introduces +-the virtual memory device for PPI where the firmware can indicate which +-operations it supports and ACPI can enable the ones that are supported and +-disable all others. This interface lies in main memory and has the following +-layout: +- +- +----------+--------+--------+-------------------------------------------+ +- | Field | Length | Offset | Description | +- +----------+--------+--------+-------------------------------------------+ +- | func | 0x100 | 0x000 | Firmware sets values for each supported | +- | | | | operation. See defined values below. | +- +----------+--------+--------+-------------------------------------------+ +- | ppin | 0x1 | 0x100 | SMI interrupt to use. Set by firmware. | +- | | | | Not supported. | +- +----------+--------+--------+-------------------------------------------+ +- | ppip | 0x4 | 0x101 | ACPI function index to pass to SMM code. | +- | | | | Set by ACPI. Not supported. | +- +----------+--------+--------+-------------------------------------------+ +- | pprp | 0x4 | 0x105 | Result of last executed operation. Set by | +- | | | | firmware. See function index 5 for values.| +- +----------+--------+--------+-------------------------------------------+ +- | pprq | 0x4 | 0x109 | Operation request number to execute. See | +- | | | | 'Physical Presence Interface Operation | +- | | | | Summary' tables in specs. Set by ACPI. | +- +----------+--------+--------+-------------------------------------------+ +- | pprm | 0x4 | 0x10d | Operation request optional parameter. | +- | | | | Values depend on operation. Set by ACPI. | +- +----------+--------+--------+-------------------------------------------+ +- | lppr | 0x4 | 0x111 | Last executed operation request number. | +- | | | | Copied from pprq field by firmware. | +- +----------+--------+--------+-------------------------------------------+ +- | fret | 0x4 | 0x115 | Result code from SMM function. | +- | | | | Not supported. | +- +----------+--------+--------+-------------------------------------------+ +- | res1 | 0x40 | 0x119 | Reserved for future use | +- +----------+--------+--------+-------------------------------------------+ +- | next_step| 0x1 | 0x159 | Operation to execute after reboot by | +- | | | | firmware. Used by firmware. | +- +----------+--------+--------+-------------------------------------------+ +- | movv | 0x1 | 0x15a | Memory overwrite variable | +- +----------+--------+--------+-------------------------------------------+ +- +- The following values are supported for the 'func' field. They correspond +- to the values used by ACPI function index 8. +- +- +----------+-------------------------------------------------------------+ +- | value | Description | +- +----------+-------------------------------------------------------------+ +- | 0 | Operation is not implemented. | +- +----------+-------------------------------------------------------------+ +- | 1 | Operation is only accessible through firmware. | +- +----------+-------------------------------------------------------------+ +- | 2 | Operation is blocked for OS by firmware configuration. | +- +----------+-------------------------------------------------------------+ +- | 3 | Operation is allowed and physically present user required. | +- +----------+-------------------------------------------------------------+ +- | 4 | Operation is allowed and physically present user is not | +- | | required. | +- +----------+-------------------------------------------------------------+ +- +-The location of the table is given by the fw_cfg tpmppi_address field. +-The PPI memory region size is 0x400 (TPM_PPI_ADDR_SIZE) to leave +-enough room for future updates. +- +- +-QEMU files related to TPM ACPI tables: +- - hw/i386/acpi-build.c +- - include/hw/acpi/tpm.h +- +- +-= TPM backend devices = +- +-The TPM implementation is split into two parts, frontend and backend. The +-frontend part is the hardware interface, such as the TPM TIS interface +-described earlier, and the other part is the TPM backend interface. The backend +-interfaces implement the interaction with a TPM device, which may be a physical +-or an emulated device. The split between the front- and backend devices allows +-a frontend to be connected with any available backend. This enables the TIS +-interface to be used with the passthrough backend or the (future) swtpm backend. +- +- +-QEMU files related to TPM backends: +- - backends/tpm.c +- - include/sysemu/tpm_backend.h +- - include/sysemu/tpm_backend_int.h +- +- +-== The QEMU TPM passthrough device == +- +-In case QEMU is run on Linux as the host operating system it is possible to +-make the hardware TPM device available to a single QEMU guest. In this case the +-user must make sure that no other program is using the device, e.g., /dev/tpm0, +-before trying to start QEMU with it. +- +-The passthrough driver uses the host's TPM device for sending TPM commands +-and receiving responses from. Besides that it accesses the TPM device's sysfs +-entry for support of command cancellation. Since none of the state of a +-hardware TPM can be migrated between hosts, virtual machine migration is +-disabled when the TPM passthrough driver is used. +- +-Since the host's TPM device will already be initialized by the host's firmware, +-certain commands, e.g. TPM_Startup(), sent by the virtual firmware for device +-initialization, will fail. In this case the firmware should not use the TPM. +- +-Sharing the device with the host is generally not a recommended usage scenario +-for a TPM device. The primary reason for this is that two operating systems can +-then access the device's single set of resources, such as platform configuration +-registers (PCRs). Applications or kernel security subsystems, such as the +-Linux Integrity Measurement Architecture (IMA), are not expecting to share PCRs. +- +- +-QEMU files related to the TPM passthrough device: +- - hw/tpm/tpm_passthrough.c +- - hw/tpm/tpm_util.c +- - hw/tpm/tpm_util.h +- +- +-Command line to start QEMU with the TPM passthrough device using the host's +-hardware TPM /dev/tpm0: +- +-qemu-system-x86_64 -display sdl -accel kvm \ +- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ +- -tpmdev passthrough,id=tpm0,path=/dev/tpm0 \ +- -device tpm-tis,tpmdev=tpm0 test.img +- +-The following commands should result in similar output inside the VM with a +-Linux kernel that either has the TPM TIS driver built-in or available as a +-module: +- +-#> dmesg | grep -i tpm +-[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) +- +-#> dmesg | grep TCPA +-[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ +- BXPCTCPA 0000001 BXPC 00000001) +- +-#> ls -l /dev/tpm* +-crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 +- +-#> find /sys/devices/ | grep pcrs$ | xargs cat +-PCR-00: 35 4E 3B CE 23 9F 38 59 ... +-... +-PCR-23: 00 00 00 00 00 00 00 00 ... +- +- +-== The QEMU TPM emulator device == +- +-The TPM emulator device uses an external TPM emulator called 'swtpm' for +-sending TPM commands to and receiving responses from. The swtpm program +-must have been started before trying to access it through the TPM emulator +-with QEMU. +- +-The TPM emulator implements a command channel for transferring TPM commands +-and responses as well as a control channel over which control commands can +-be sent. The specification for the control channel can be found here: +- +-https://github.com/stefanberger/swtpm/blob/master/man/man3/swtpm_ioctls.pod +- +- +-The control channel serves the purpose of resetting, initializing, and +-migrating the TPM state, among other things. +- +-The swtpm program behaves like a hardware TPM and therefore needs to be +-initialized by the firmware running inside the QEMU virtual machine. +-One necessary step for initializing the device is to send the TPM_Startup +-command to it. SeaBIOS, for example, has been instrumented to initialize +-a TPM 1.2 or TPM 2 device using this command. +- +- +-QEMU files related to the TPM emulator device: +- - hw/tpm/tpm_emulator.c +- - hw/tpm/tpm_util.c +- - hw/tpm/tpm_util.h +- +- +-The following commands start the swtpm with a UnixIO control channel over +-a socket interface. They do not need to be run as root. +- +-mkdir /tmp/mytpm1 +-swtpm socket --tpmstate dir=/tmp/mytpm1 \ +- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ +- --log level=20 +- +-Command line to start QEMU with the TPM emulator device communicating with +-the swtpm (x86): +- +-qemu-system-x86_64 -display sdl -accel kvm \ +- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ +- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ +- -tpmdev emulator,id=tpm0,chardev=chrtpm \ +- -device tpm-tis,tpmdev=tpm0 test.img +- +-In case a pSeries machine is emulated, use the following command line: +- +-qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ +- -m 1024 -bios slof.bin -boot menu=on \ +- -nodefaults -device VGA -device pci-ohci -device usb-kbd \ +- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ +- -tpmdev emulator,id=tpm0,chardev=chrtpm \ +- -device tpm-spapr,tpmdev=tpm0 \ +- -device spapr-vscsi,id=scsi0,reg=0x00002000 \ +- -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ +- -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 +- +- +-In case SeaBIOS is used as firmware, it should show the TPM menu item +-after entering the menu with 'ESC'. +- +-Select boot device: +-1. DVD/CD [ata1-0: QEMU DVD-ROM ATAPI-4 DVD/CD] +-[...] +-5. Legacy option rom +- +-t. TPM Configuration +- +- +-The following commands should result in similar output inside the VM with a +-Linux kernel that either has the TPM TIS driver built-in or available as a +-module: +- +-#> dmesg | grep -i tpm +-[ 0.711310] tpm_tis 00:06: 1.2 TPM (device=id 0x1, rev-id 1) +- +-#> dmesg | grep TCPA +-[ 0.000000] ACPI: TCPA 0x0000000003FFD191C 000032 (v02 BOCHS \ +- BXPCTCPA 0000001 BXPC 00000001) +- +-#> ls -l /dev/tpm* +-crw-------. 1 root root 10, 224 Jul 11 10:11 /dev/tpm0 +- +-#> find /sys/devices/ | grep pcrs$ | xargs cat +-PCR-00: 35 4E 3B CE 23 9F 38 59 ... +-... +-PCR-23: 00 00 00 00 00 00 00 00 ... +- +- +-=== Migration with the TPM emulator === +- +-The TPM emulator supports the following types of virtual machine migration: +- +-- VM save / restore (migration into a file) +-- Network migration +-- Snapshotting (migration into storage like QoW2 or QED) +- +-The following command sequences can be used to test VM save / restore. +- +- +-In a 1st terminal start an instance of a swtpm using the following command: +- +-mkdir /tmp/mytpm1 +-swtpm socket --tpmstate dir=/tmp/mytpm1 \ +- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ +- --log level=20 --tpm2 +- +-In a 2nd terminal start the VM: +- +-qemu-system-x86_64 -display sdl -accel kvm \ +- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ +- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ +- -tpmdev emulator,id=tpm0,chardev=chrtpm \ +- -device tpm-tis,tpmdev=tpm0 \ +- -monitor stdio \ +- test.img +- +-Verify that the attached TPM is working as expected using applications inside +-the VM. +- +-To store the state of the VM use the following command in the QEMU monitor in +-the 2nd terminal: +- +-(qemu) migrate "exec:cat > testvm.bin" +-(qemu) quit +- +-At this point a file called 'testvm.bin' should exists and the swtpm and QEMU +-processes should have ended. +- +-To test 'VM restore' you have to start the swtpm with the same parameters +-as before. If previously a TPM 2 [--tpm2] was saved, --tpm2 must now be +-passed again on the command line. +- +-In the 1st terminal restart the swtpm with the same command line as before: +- +-swtpm socket --tpmstate dir=/tmp/mytpm1 \ +- --ctrl type=unixio,path=/tmp/mytpm1/swtpm-sock \ +- --log level=20 --tpm2 +- +-In the 2nd terminal restore the state of the VM using the additional +-'-incoming' option. +- +-qemu-system-x86_64 -display sdl -accel kvm \ +- -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ +- -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ +- -tpmdev emulator,id=tpm0,chardev=chrtpm \ +- -device tpm-tis,tpmdev=tpm0 \ +- -incoming "exec:cat < testvm.bin" \ +- test.img +- +- +-Troubleshooting migration: +- +-There are several reasons why migration may fail. In case of problems, +-please ensure that the command lines adhere to the following rules and, +-if possible, that identical versions of QEMU and swtpm are used at all +-times. +- +-VM save and restore: +- - QEMU command line parameters should be identical apart from the +- '-incoming' option on VM restore +- - swtpm command line parameters should be identical +- +-VM migration to 'localhost': +- - QEMU command line parameters should be identical apart from the +- '-incoming' option on the destination side +- - swtpm command line parameters should point to two different +- directories on the source and destination swtpm (--tpmstate dir=...) +- (especially if different versions of libtpms were to be used on the +- same machine). +- +-VM migration across the network: +- - QEMU command line parameters should be identical apart from the +- '-incoming' option on the destination side +- - swtpm command line parameters should be identical +- +-VM Snapshotting: +- - QEMU command line parameters should be identical +- - swtpm command line parameters should be identical +- +- +-Besides that, migration failure reasons on the swtpm level may include +-the following: +- +- - the versions of the swtpm on the source and destination sides are +- incompatible +- - downgrading of TPM state may not be supported +- - the source and destination libtpms were compiled with different +- compile-time options and the destination side refuses to accept the +- state +- - different migration keys are used on the source and destination side +- and the destination side cannot decrypt the migrated state +- (swtpm ... --migration-key ... ) +-- +2.23.0 + diff --git a/drive-backup-create-do_backup_common.patch b/drive-backup-create-do_backup_common.patch new file mode 100644 index 0000000000000000000000000000000000000000..cccbc2e967c1529f5072ac64cbad1f6de3c3aee1 --- /dev/null +++ b/drive-backup-create-do_backup_common.patch @@ -0,0 +1,163 @@ +From 98dcfbd5ee53f3be705df7acf37e8706533f494f Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:52 -0400 +Subject: [PATCH] drive-backup: create do_backup_common + +Create a common core that comprises the actual meat of what the backup API +boundary needs to do, and then switch drive-backup to use it. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Message-id: 20190709232550.10724-3-jsnow@redhat.com +Signed-off-by: John Snow +--- + blockdev.c | 102 ++++++++++++++++++++++++++++++----------------------- + 1 file changed, 57 insertions(+), 45 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 99c92b96d2..a29838a1c8 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3469,20 +3469,16 @@ out: + aio_context_release(aio_context); + } + +-static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, +- Error **errp) ++/* Common QMP interface for drive-backup and blockdev-backup */ ++static BlockJob *do_backup_common(BackupCommon *backup, ++ BlockDriverState *bs, ++ BlockDriverState *target_bs, ++ AioContext *aio_context, ++ JobTxn *txn, Error **errp) + { +- BlockDriverState *bs; +- BlockDriverState *target_bs; +- BlockDriverState *source = NULL; + BlockJob *job = NULL; + BdrvDirtyBitmap *bmap = NULL; +- AioContext *aio_context; +- QDict *options = NULL; +- Error *local_err = NULL; +- int flags, job_flags = JOB_DEFAULT; +- int64_t size; +- bool set_backing_hd = false; ++ int job_flags = JOB_DEFAULT; + int ret; + + if (!backup->has_speed) { +@@ -3494,9 +3490,6 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + if (!backup->has_on_target_error) { + backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } +- if (!backup->has_mode) { +- backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +- } + if (!backup->has_job_id) { + backup->job_id = NULL; + } +@@ -3510,6 +3503,54 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + backup->compress = false; + } + ++ ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); ++ if (ret < 0) { ++ return NULL; ++ } ++ ++ if (backup->has_bitmap) { ++ bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); ++ if (!bmap) { ++ error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); ++ return NULL; ++ } ++ if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { ++ return NULL; ++ } ++ } ++ ++ if (!backup->auto_finalize) { ++ job_flags |= JOB_MANUAL_FINALIZE; ++ } ++ if (!backup->auto_dismiss) { ++ job_flags |= JOB_MANUAL_DISMISS; ++ } ++ ++ job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, ++ backup->sync, bmap, backup->compress, ++ backup->on_source_error, backup->on_target_error, ++ job_flags, NULL, NULL, txn, errp); ++ return job; ++} ++ ++static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BlockDriverState *target_bs; ++ BlockDriverState *source = NULL; ++ BlockJob *job = NULL; ++ AioContext *aio_context; ++ QDict *options = NULL; ++ Error *local_err = NULL; ++ int flags; ++ int64_t size; ++ bool set_backing_hd = false; ++ ++ if (!backup->has_mode) { ++ backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; ++ } ++ + bs = bdrv_lookup_bs(backup->device, backup->device, errp); + if (!bs) { + return NULL; +@@ -3585,12 +3626,6 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + goto out; + } + +- ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); +- if (ret < 0) { +- bdrv_unref(target_bs); +- goto out; +- } +- + if (set_backing_hd) { + bdrv_set_backing_hd(target_bs, source, &local_err); + if (local_err) { +@@ -3598,31 +3633,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, JobTxn *txn, + } + } + +- if (backup->has_bitmap) { +- bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap); +- if (!bmap) { +- error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap); +- goto unref; +- } +- if (bdrv_dirty_bitmap_check(bmap, BDRV_BITMAP_DEFAULT, errp)) { +- goto unref; +- } +- } +- if (!backup->auto_finalize) { +- job_flags |= JOB_MANUAL_FINALIZE; +- } +- if (!backup->auto_dismiss) { +- job_flags |= JOB_MANUAL_DISMISS; +- } +- +- job = backup_job_create(backup->job_id, bs, target_bs, backup->speed, +- backup->sync, bmap, backup->compress, +- backup->on_source_error, backup->on_target_error, +- job_flags, NULL, NULL, txn, &local_err); +- if (local_err != NULL) { +- error_propagate(errp, local_err); +- goto unref; +- } ++ job = do_backup_common(qapi_DriveBackup_base(backup), ++ bs, target_bs, aio_context, txn, errp); + + unref: + bdrv_unref(target_bs); +-- +2.27.0 + diff --git a/elf2dmp-Fix-memory-leak-on-main-error-paths.patch b/elf2dmp-Fix-memory-leak-on-main-error-paths.patch new file mode 100644 index 0000000000000000000000000000000000000000..219cec31e799c1f32912a717982740f08c70a3c0 --- /dev/null +++ b/elf2dmp-Fix-memory-leak-on-main-error-paths.patch @@ -0,0 +1,41 @@ +From 1f63f8c20a4cb7b752981ef07b2614bbea828b30 Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Wed, 26 Aug 2020 18:15:53 +0800 +Subject: [PATCH] elf2dmp: Fix memory leak on main() error paths + +The 'kdgb' is allocating memory in get_kdbg(), but it is not freed +in both fill_header() and fill_context() failed branches, fix it. + +Signed-off-by: AlexChen +Reviewed-by: Li Qiang +Reviewed-by: Viktor Prutyanov +Reviewed-by: Thomas Huth +Message-Id: <5F463659.8080101@huawei.com> +Signed-off-by: Laurent Vivier +(cherry-picked from commit 885538fdc9) +--- + contrib/elf2dmp/main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c +index 9a2dbc2902..ac746e49e0 100644 +--- a/contrib/elf2dmp/main.c ++++ b/contrib/elf2dmp/main.c +@@ -568,12 +568,12 @@ int main(int argc, char *argv[]) + if (fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg, + KdVersionBlock, qemu_elf.state_nr)) { + err = 1; +- goto out_pdb; ++ goto out_kdbg; + } + + if (fill_context(kdbg, &vs, &qemu_elf)) { + err = 1; +- goto out_pdb; ++ goto out_kdbg; + } + + if (write_dump(&ps, &header, argv[2])) { +-- +2.27.0 + diff --git a/es1370-check-total-frame-count-against-current-frame.patch b/es1370-check-total-frame-count-against-current-frame.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb1e7a7cdfa6f8046b6aa2ebb270b557dcae14a5 --- /dev/null +++ b/es1370-check-total-frame-count-against-current-frame.patch @@ -0,0 +1,60 @@ +From 22bbf1a90ac11fe30e1665c09f9ad904683b6ddc Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Fri, 15 May 2020 01:36:08 +0530 +Subject: [PATCH 1/9] es1370: check total frame count against current frame + +A guest user may set channel frame count via es1370_write() +such that, in es1370_transfer_audio(), total frame count +'size' is lesser than the number of frames that are processed +'cnt'. + + int cnt = d->frame_cnt >> 16; + int size = d->frame_cnt & 0xffff; + +if (size < cnt), it results in incorrect calculations leading +to OOB access issue(s). Add check to avoid it. + +Reported-by: Ren Ding +Reported-by: Hanqing Zhao +Signed-off-by: Prasad J Pandit +Message-id: 20200514200608.1744203-1-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +--- + hw/audio/es1370.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c +index 260c142b70..eff7d03ae1 100644 +--- a/hw/audio/es1370.c ++++ b/hw/audio/es1370.c +@@ -643,6 +643,9 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, + int csc_bytes = (csc + 1) << d->shift; + int cnt = d->frame_cnt >> 16; + int size = d->frame_cnt & 0xffff; ++ if (size < cnt) { ++ return; ++ } + int left = ((size - cnt + 1) << 2) + d->leftover; + int transferred = 0; + int temp = audio_MIN (max, audio_MIN (left, csc_bytes)); +@@ -651,7 +654,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, + addr += (cnt << 2) + d->leftover; + + if (index == ADC_CHANNEL) { +- while (temp) { ++ while (temp > 0) { + int acquired, to_copy; + + to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf)); +@@ -669,7 +672,7 @@ static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, + else { + SWVoiceOut *voice = s->dac_voice[index]; + +- while (temp) { ++ while (temp > 0) { + int copied, to_copy; + + to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf)); +-- +2.25.1 + diff --git a/exec-set-map-length-to-zero-when-returning-NULL.patch b/exec-set-map-length-to-zero-when-returning-NULL.patch new file mode 100644 index 0000000000000000000000000000000000000000..64c918e8d9de6eb3dd357c955d75488ff5f11c48 --- /dev/null +++ b/exec-set-map-length-to-zero-when-returning-NULL.patch @@ -0,0 +1,54 @@ +From a1a9d6f908b21878daa7868313243c30b7a90fcf Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 26 May 2020 16:47:43 +0530 +Subject: [PATCH 2/9] exec: set map length to zero when returning NULL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When mapping physical memory into host's virtual address space, +'address_space_map' may return NULL if BounceBuffer is in_use. +Set and return '*plen = 0' to avoid later NULL pointer dereference. + +Reported-by: Alexander Bulekov +Fixes: https://bugs.launchpad.net/qemu/+bug/1878259 +Suggested-by: Paolo Bonzini +Suggested-by: Peter Maydell +Signed-off-by: Prasad J Pandit +Message-Id: <20200526111743.428367-1-ppandit@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +--- + exec.c | 1 + + include/exec/memory.h | 3 ++- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/exec.c b/exec.c +index 3e78de3b8f..85c6d80353 100644 +--- a/exec.c ++++ b/exec.c +@@ -3739,6 +3739,7 @@ void *address_space_map(AddressSpace *as, + if (!memory_access_is_direct(mr, is_write)) { + if (atomic_xchg(&bounce.in_use, true)) { + rcu_read_unlock(); ++ *plen = 0; + return NULL; + } + /* Avoid unbounded allocations */ +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 611a89122d..dca8184277 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2064,7 +2064,8 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, hwaddr len, + /* address_space_map: map a physical memory region into a host virtual address + * + * May map a subset of the requested range, given by and returned in @plen. +- * May return %NULL if resources needed to perform the mapping are exhausted. ++ * May return %NULL and set *@plen to zero(0), if resources needed to perform ++ * the mapping are exhausted. + * Use only for reads OR writes - not for read-modify-write operations. + * Use cpu_register_map_client() to know when retrying the map operation is + * likely to succeed. +-- +2.25.1 + diff --git a/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch b/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch new file mode 100644 index 0000000000000000000000000000000000000000..28c1e3bc6837063888bb8c862fb1e629f70de8be --- /dev/null +++ b/file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch @@ -0,0 +1,31 @@ +From 94be73a20d42482cdf30115e672c36af2fe9068d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 17 Jul 2020 12:54:26 +0200 +Subject: [PATCH 5/5] file-posix: Fix leaked fd in raw_open_common() error path + +Signed-off-by: Kevin Wolf +Message-Id: <20200717105426.51134-4-kwolf@redhat.com> +Reviewed-by: Max Reitz +Signed-off-by: Kevin Wolf +Signed-off-by: Zhenyu Ye +--- + block/file-posix.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 2184aa98..1259bf58 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -671,6 +671,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; + ret = 0; + fail: ++ if (ret < 0 && s->fd != -1) { ++ qemu_close(s->fd); ++ } + if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { + unlink(filename); + } +-- +2.22.0.windows.1 + diff --git a/fix-vhost_user_blk_watch-crash.patch b/fix-vhost_user_blk_watch-crash.patch new file mode 100644 index 0000000000000000000000000000000000000000..905cbe3c2542b7d59f8d69da720bf0639a4be9bb --- /dev/null +++ b/fix-vhost_user_blk_watch-crash.patch @@ -0,0 +1,81 @@ +From 0b77995819a596f96c621697643e83624126e668 Mon Sep 17 00:00:00 2001 +From: Li Feng +Date: Mon, 23 Mar 2020 13:29:24 +0800 +Subject: [PATCH 13/14] fix vhost_user_blk_watch crash + +the G_IO_HUP is watched in tcp_chr_connect, and the callback +vhost_user_blk_watch is not needed, because tcp_chr_hup is registered as +callback. And it will close the tcp link. + +Signed-off-by: Li Feng +Message-Id: <20200323052924.29286-1-fengli@smartx.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Peng Liang +--- + hw/block/vhost-user-blk.c | 19 ------------------- + include/hw/virtio/vhost-user-blk.h | 1 - + 2 files changed, 20 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index 85bc4017e7e9..dc66f8a5febd 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -346,18 +346,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) + vhost_dev_cleanup(&s->dev); + } + +-static gboolean vhost_user_blk_watch(GIOChannel *chan, GIOCondition cond, +- void *opaque) +-{ +- DeviceState *dev = opaque; +- VirtIODevice *vdev = VIRTIO_DEVICE(dev); +- VHostUserBlk *s = VHOST_USER_BLK(vdev); +- +- qemu_chr_fe_disconnect(&s->chardev); +- +- return true; +-} +- + static void vhost_user_blk_event(void *opaque, int event) + { + DeviceState *dev = opaque; +@@ -370,15 +358,9 @@ static void vhost_user_blk_event(void *opaque, int event) + qemu_chr_fe_disconnect(&s->chardev); + return; + } +- s->watch = qemu_chr_fe_add_watch(&s->chardev, G_IO_HUP, +- vhost_user_blk_watch, dev); + break; + case CHR_EVENT_CLOSED: + vhost_user_blk_disconnect(dev); +- if (s->watch) { +- g_source_remove(s->watch); +- s->watch = 0; +- } + break; + } + } +@@ -419,7 +401,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + + s->inflight = g_new0(struct vhost_inflight, 1); + s->vqs = g_new(struct vhost_virtqueue, s->num_queues); +- s->watch = 0; + s->connected = false; + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, +diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h +index 8dbf11c6f071..ad9b742a644c 100644 +--- a/include/hw/virtio/vhost-user-blk.h ++++ b/include/hw/virtio/vhost-user-blk.h +@@ -38,7 +38,6 @@ typedef struct VHostUserBlk { + struct vhost_inflight *inflight; + VhostUserState vhost_user; + struct vhost_virtqueue *vqs; +- guint watch; + bool connected; + } VHostUserBlk; + +-- +2.26.2 + diff --git a/hbitmap-handle-set-reset-with-zero-length.patch b/hbitmap-handle-set-reset-with-zero-length.patch new file mode 100644 index 0000000000000000000000000000000000000000..b346a970d8594e2fae6a730c8c66370dc66af0da --- /dev/null +++ b/hbitmap-handle-set-reset-with-zero-length.patch @@ -0,0 +1,50 @@ +From c0b35d87de345bd3b59a44c604b247a0497f2fc0 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Fri, 11 Oct 2019 12:07:07 +0300 +Subject: [PATCH] hbitmap: handle set/reset with zero length + +Passing zero length to these functions leads to unpredicted results. +Zero-length set/reset may occur in active-mirror, on zero-length write +(which is unlikely, but not guaranteed to never happen). + +Let's just do nothing on zero-length request. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-id: 20191011090711.19940-2-vsementsov@virtuozzo.com +Reviewed-by: Max Reitz +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +(cherry picked from commit fed33bd175f663cc8c13f8a490a4f35a19756cfe) +Signed-off-by: Michael Roth +--- + util/hbitmap.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/util/hbitmap.c b/util/hbitmap.c +index 71c6ba2c52..c059313b9e 100644 +--- a/util/hbitmap.c ++++ b/util/hbitmap.c +@@ -387,6 +387,10 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) + uint64_t first, n; + uint64_t last = start + count - 1; + ++ if (count == 0) { ++ return; ++ } ++ + trace_hbitmap_set(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + +@@ -478,6 +482,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) + uint64_t last = start + count - 1; + uint64_t gran = 1ULL << hb->granularity; + ++ if (count == 0) { ++ return; ++ } ++ + assert(QEMU_IS_ALIGNED(start, gran)); + assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); + +-- +2.23.0 diff --git a/hmp-vnc-Fix-info-vnc-list-leak.patch b/hmp-vnc-Fix-info-vnc-list-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..ccc4e1db511a18c5da864a1d8b2732e9a4cd8a1f --- /dev/null +++ b/hmp-vnc-Fix-info-vnc-list-leak.patch @@ -0,0 +1,48 @@ +From 6cb599f75b7844aefd7823ad97fc3bae70eff11f Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 23 Mar 2020 12:08:22 +0000 +Subject: [PATCH 06/14] hmp/vnc: Fix info vnc list leak + +We're iterating the list, and then freeing the iteration pointer rather +than the list head. + +Fixes: 0a9667ecdb6d ("hmp: Update info vnc") +Reported-by: Coverity (CID 1421932) +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200323120822.51266-1-dgilbert@redhat.com> +Reviewed-by: Peter Maydell +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Peng Liang +--- + monitor/hmp-cmds.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 5ca3ebe94272..fc5d6b92c4b6 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -745,10 +745,11 @@ static void hmp_info_vnc_servers(Monitor *mon, VncServerInfo2List *server) + + void hmp_info_vnc(Monitor *mon, const QDict *qdict) + { +- VncInfo2List *info2l; ++ VncInfo2List *info2l, *info2l_head; + Error *err = NULL; + + info2l = qmp_query_vnc_servers(&err); ++ info2l_head = info2l; + if (err) { + hmp_handle_error(mon, &err); + return; +@@ -777,7 +778,7 @@ void hmp_info_vnc(Monitor *mon, const QDict *qdict) + info2l = info2l->next; + } + +- qapi_free_VncInfo2List(info2l); ++ qapi_free_VncInfo2List(info2l_head); + + } + #endif +-- +2.26.2 + diff --git a/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch b/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch new file mode 100644 index 0000000000000000000000000000000000000000..0cbda2bf538fdf6eccab11faa319bc9a31be12f9 --- /dev/null +++ b/hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch @@ -0,0 +1,61 @@ +From f14505f7f91edbce738202a6f658806d1074116c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 11 Dec 2020 17:28:39 +0800 +Subject: [PATCH] hostmem: Fix up free host_nodes list right after visited + +In host_memory_backend_get_host_nodes, we build host_nodes +list and output it to v (a StringOutputVisitor) but forget +to free the list. This fixes the memory leak. + +The memory leak stack: + +Direct leak of 32 byte(s) in 2 object(s) allocated from: + #0 0xfffda30b3393 in __interceptor_calloc (/usr/lib64/libasan.so.4+0xd3393) + #1 0xfffda1d28b9b in g_malloc0 (/usr/lib64/libglib-2.0.so.0+0x58b9b) + #2 0xaaab05ca6e43 in host_memory_backend_get_host_nodes backends/hostmem.c:94 + #3 0xaaab061ddf83 in object_property_get_uint16List qom/object.c:1478 + #4 0xaaab05866513 in query_memdev hw/core/machine-qmp-cmds.c:312 + #5 0xaaab061d980b in do_object_child_foreach qom/object.c:1001 + #6 0xaaab0586779b in qmp_query_memdev hw/core/machine-qmp-cmds.c:328 + #7 0xaaab0615ed3f in qmp_marshal_query_memdev qapi/qapi-commands-machine.c:327 + #8 0xaaab0632d647 in do_qmp_dispatch qapi/qmp-dispatch.c:147 + #9 0xaaab0632d647 in qmp_dispatch qapi/qmp-dispatch.c:190 + #10 0xaaab0610f74b in monitor_qmp_dispatch monitor/qmp.c:120 + #11 0xaaab0611074b in monitor_qmp_bh_dispatcher monitor/qmp.c:209 + #12 0xaaab063caefb in aio_bh_poll util/async.c:117 + #13 0xaaab063d30fb in aio_dispatch util/aio-posix.c:459 + #14 0xaaab063cac8f in aio_ctx_dispatch util/async.c:268 + #15 0xfffda1d22a6b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a6b) + #16 0xaaab063d0e97 in glib_pollfds_poll util/main-loop.c:218 + #17 0xaaab063d0e97 in os_host_main_loop_wait util/main-loop.c:241 + #18 0xaaab063d0e97 in main_loop_wait util/main-loop.c:517 + #19 0xaaab05c8bfa7 in main_loop /root/rpmbuild/BUILD/qemu-4.1.0/vl.c:1791 + #20 0xaaab05713bc3 in main /root/rpmbuild/BUILD/qemu-4.1.0/vl.c:4473 + #21 0xfffda0a83ebf in __libc_start_main (/usr/lib64/libc.so.6+0x23ebf) + #22 0xaaab0571ed5f (aarch64-softmmu/qemu-system-aarch64+0x88ed5f) +SUMMARY: AddressSanitizer: 32 byte(s) leaked in 2 allocation(s). + +Fixes: 4cf1b76bf1e2 (hostmem: add properties for NUMA memory policy) +Reported-by: Euler Robot +Tested-by: Chen Qun +Reviewed-by: Igor Mammedov +Signed-off-by: Keqian Zhu +--- + backends/hostmem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 463102aa15..9e1b3a0afc 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -108,6 +108,7 @@ host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name, + + ret: + visit_type_uint16List(v, name, &host_nodes, errp); ++ qapi_free_uint16List(host_nodes); + } + + static void +-- +2.27.0 + diff --git a/hppa-fix-leak-from-g_strdup_printf.patch b/hppa-fix-leak-from-g_strdup_printf.patch new file mode 100644 index 0000000000000000000000000000000000000000..b04193e380fe58ef14e91cb56d162abc264dce9b --- /dev/null +++ b/hppa-fix-leak-from-g_strdup_printf.patch @@ -0,0 +1,54 @@ +From b7ef7e6fb5a2b08268f4b19c07c07abd4fbb2064 Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 14:48:49 +0800 +Subject: [PATCH] hppa: fix leak from g_strdup_printf +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +memory_region_init_* takes care of copying the name into memory it owns. +Free it in the caller. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Philippe Mathieu-Daudé +--- + hw/hppa/dino.c | 1 + + hw/hppa/machine.c | 4 +++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c +index e94614ab..ef923b49 100644 +--- a/hw/hppa/dino.c ++++ b/hw/hppa/dino.c +@@ -485,6 +485,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, + memory_region_init_alias(&s->pci_mem_alias[i], OBJECT(s), + name, &s->pci_mem, addr, + DINO_MEM_CHUNK_SIZE); ++ g_free(name); + } + + /* Set up PCI view of memory: Bus master address space. */ +diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c +index 662838d8..9e25660e 100644 +--- a/hw/hppa/machine.c ++++ b/hw/hppa/machine.c +@@ -78,13 +78,15 @@ static void machine_hppa_init(MachineState *machine) + + /* Create CPUs. */ + for (i = 0; i < smp_cpus; i++) { ++ char *name = g_strdup_printf("cpu%ld-io-eir", i); + cpu[i] = HPPA_CPU(cpu_create(machine->cpu_type)); + + cpu_region = g_new(MemoryRegion, 1); + memory_region_init_io(cpu_region, OBJECT(cpu[i]), &hppa_io_eir_ops, +- cpu[i], g_strdup_printf("cpu%ld-io-eir", i), 4); ++ cpu[i], name, 4); + memory_region_add_subregion(addr_space, CPU_HPA + i * 0x1000, + cpu_region); ++ g_free(name); + } + + /* Limit main memory. */ +-- +2.19.1 + diff --git a/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch b/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc57aa64dd12427afc1e59af0476206317065d1b --- /dev/null +++ b/hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch @@ -0,0 +1,478 @@ +From b12d9edd0079d4ee136c25e95333918b0c6d3cd9 Mon Sep 17 00:00:00 2001 +From: Samuel Ortiz +Date: Wed, 18 Sep 2019 14:06:25 +0100 +Subject: [PATCH] hw/acpi: Add ACPI Generic Event Device Support + +The ACPI Generic Event Device (GED) is a hardware-reduced specific +device[ACPI v6.1 Section 5.6.9] that handles all platform events, +including the hotplug ones. This patch generates the AML code that +defines GEDs. + +Platforms need to specify their own GED Event bitmap to describe +what kind of events they want to support through GED. Also this +uses a a single interrupt for the GED device, relying on IO +memory region to communicate the type of device affected by the +interrupt. This way, we can support up to 32 events with a unique +interrupt. + +This supports only memory hotplug for now. + +Signed-off-by: Samuel Ortiz +Signed-off-by: Sebastien Boeuf +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Message-Id: <20190918130633.4872-4-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +--- + hw/acpi/Kconfig | 4 + + hw/acpi/Makefile.objs | 1 + + hw/acpi/generic_event_device.c | 303 +++++++++++++++++++++++++ + include/hw/acpi/generic_event_device.h | 100 ++++++++ + 4 files changed, 408 insertions(+) + create mode 100644 hw/acpi/generic_event_device.c + create mode 100644 include/hw/acpi/generic_event_device.h + +diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig +index 7c59cf900b..12e3f1e86e 100644 +--- a/hw/acpi/Kconfig ++++ b/hw/acpi/Kconfig +@@ -31,3 +31,7 @@ config ACPI_VMGENID + bool + default y + depends on PC ++ ++config ACPI_HW_REDUCED ++ bool ++ depends on ACPI +diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs +index 1a720c381e..e4b5d101a4 100644 +--- a/hw/acpi/Makefile.objs ++++ b/hw/acpi/Makefile.objs +@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o + common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o + common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o + common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o ++common-obj-$(CONFIG_ACPI_HW_REDUCED) += generic_event_device.o + common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o + + common-obj-y += acpi_interface.o +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +new file mode 100644 +index 0000000000..b94500b08d +--- /dev/null ++++ b/hw/acpi/generic_event_device.c +@@ -0,0 +1,303 @@ ++/* ++ * ++ * Copyright (c) 2018 Intel Corporation ++ * Copyright (c) 2019 Huawei Technologies R & D (UK) Ltd ++ * Written by Samuel Ortiz, Shameer Kolothum ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2 or later, as published by the Free Software Foundation. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "exec/address-spaces.h" ++#include "hw/acpi/acpi.h" ++#include "hw/acpi/generic_event_device.h" ++#include "hw/irq.h" ++#include "hw/mem/pc-dimm.h" ++#include "hw/qdev-properties.h" ++#include "migration/vmstate.h" ++#include "qemu/error-report.h" ++ ++static const uint32_t ged_supported_events[] = { ++ ACPI_GED_MEM_HOTPLUG_EVT, ++}; ++ ++/* ++ * The ACPI Generic Event Device (GED) is a hardware-reduced specific ++ * device[ACPI v6.1 Section 5.6.9] that handles all platform events, ++ * including the hotplug ones. Platforms need to specify their own ++ * GED Event bitmap to describe what kind of events they want to support ++ * through GED. This routine uses a single interrupt for the GED device, ++ * relying on IO memory region to communicate the type of device ++ * affected by the interrupt. This way, we can support up to 32 events ++ * with a unique interrupt. ++ */ ++void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, ++ uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base) ++{ ++ AcpiGedState *s = ACPI_GED(hotplug_dev); ++ Aml *crs = aml_resource_template(); ++ Aml *evt, *field; ++ Aml *dev = aml_device("%s", name); ++ Aml *evt_sel = aml_local(0); ++ Aml *esel = aml_name(AML_GED_EVT_SEL); ++ ++ /* _CRS interrupt */ ++ aml_append(crs, aml_interrupt(AML_CONSUMER, AML_EDGE, AML_ACTIVE_HIGH, ++ AML_EXCLUSIVE, &ged_irq, 1)); ++ ++ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0013"))); ++ aml_append(dev, aml_name_decl("_UID", aml_string(GED_DEVICE))); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ ++ /* Append IO region */ ++ aml_append(dev, aml_operation_region(AML_GED_EVT_REG, rs, ++ aml_int(ged_base + ACPI_GED_EVT_SEL_OFFSET), ++ ACPI_GED_EVT_SEL_LEN)); ++ field = aml_field(AML_GED_EVT_REG, AML_DWORD_ACC, AML_NOLOCK, ++ AML_WRITE_AS_ZEROS); ++ aml_append(field, aml_named_field(AML_GED_EVT_SEL, ++ ACPI_GED_EVT_SEL_LEN * BITS_PER_BYTE)); ++ aml_append(dev, field); ++ ++ /* ++ * For each GED event we: ++ * - Add a conditional block for each event, inside a loop. ++ * - Call a method for each supported GED event type. ++ * ++ * The resulting ASL code looks like: ++ * ++ * Local0 = ESEL ++ * If ((Local0 & One) == One) ++ * { ++ * MethodEvent0() ++ * } ++ * ++ * If ((Local0 & 0x2) == 0x2) ++ * { ++ * MethodEvent1() ++ * } ++ * ... ++ */ ++ evt = aml_method("_EVT", 1, AML_SERIALIZED); ++ { ++ Aml *if_ctx; ++ uint32_t i; ++ uint32_t ged_events = ctpop32(s->ged_event_bitmap); ++ ++ /* Local0 = ESEL */ ++ aml_append(evt, aml_store(esel, evt_sel)); ++ ++ for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) { ++ uint32_t event = s->ged_event_bitmap & ged_supported_events[i]; ++ ++ if (!event) { ++ continue; ++ } ++ ++ if_ctx = aml_if(aml_equal(aml_and(evt_sel, aml_int(event), NULL), ++ aml_int(event))); ++ switch (event) { ++ case ACPI_GED_MEM_HOTPLUG_EVT: ++ aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." ++ MEMORY_SLOT_SCAN_METHOD)); ++ break; ++ default: ++ /* ++ * Please make sure all the events in ged_supported_events[] ++ * are handled above. ++ */ ++ g_assert_not_reached(); ++ } ++ ++ aml_append(evt, if_ctx); ++ ged_events--; ++ } ++ ++ if (ged_events) { ++ error_report("Unsupported events specified"); ++ abort(); ++ } ++ } ++ ++ /* Append _EVT method */ ++ aml_append(dev, evt); ++ ++ aml_append(table, dev); ++} ++ ++/* Memory read by the GED _EVT AML dynamic method */ ++static uint64_t ged_read(void *opaque, hwaddr addr, unsigned size) ++{ ++ uint64_t val = 0; ++ GEDState *ged_st = opaque; ++ ++ switch (addr) { ++ case ACPI_GED_EVT_SEL_OFFSET: ++ /* Read the selector value and reset it */ ++ val = ged_st->sel; ++ ged_st->sel = 0; ++ break; ++ default: ++ break; ++ } ++ ++ return val; ++} ++ ++/* Nothing is expected to be written to the GED memory region */ ++static void ged_write(void *opaque, hwaddr addr, uint64_t data, ++ unsigned int size) ++{ ++} ++ ++static const MemoryRegionOps ged_ops = { ++ .read = ged_read, ++ .write = ged_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++ .valid = { ++ .min_access_size = 4, ++ .max_access_size = 4, ++ }, ++}; ++ ++static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ AcpiGedState *s = ACPI_GED(hotplug_dev); ++ ++ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { ++ acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); ++ } else { ++ error_setg(errp, "virt: device plug request for unsupported device" ++ " type: %s", object_get_typename(OBJECT(dev))); ++ } ++} ++ ++static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) ++{ ++ AcpiGedState *s = ACPI_GED(adev); ++ GEDState *ged_st = &s->ged_state; ++ uint32_t sel; ++ ++ if (ev & ACPI_MEMORY_HOTPLUG_STATUS) { ++ sel = ACPI_GED_MEM_HOTPLUG_EVT; ++ } else { ++ /* Unknown event. Return without generating interrupt. */ ++ warn_report("GED: Unsupported event %d. No irq injected", ev); ++ return; ++ } ++ ++ /* ++ * Set the GED selector field to communicate the event type. ++ * This will be read by GED aml code to select the appropriate ++ * event method. ++ */ ++ ged_st->sel |= sel; ++ ++ /* Trigger the event by sending an interrupt to the guest. */ ++ qemu_irq_pulse(s->irq); ++} ++ ++static Property acpi_ged_properties[] = { ++ DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static const VMStateDescription vmstate_memhp_state = { ++ .name = "acpi-ged/memhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_MEMORY_HOTPLUG(memhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_ged_state = { ++ .name = "acpi-ged-state", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32(sel, GEDState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_acpi_ged = { ++ .name = "acpi-ged", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_STRUCT(ged_state, AcpiGedState, 1, vmstate_ged_state, GEDState), ++ VMSTATE_END_OF_LIST(), ++ }, ++ .subsections = (const VMStateDescription * []) { ++ &vmstate_memhp_state, ++ NULL ++ } ++}; ++ ++static void acpi_ged_initfn(Object *obj) ++{ ++ DeviceState *dev = DEVICE(obj); ++ AcpiGedState *s = ACPI_GED(dev); ++ SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ GEDState *ged_st = &s->ged_state; ++ ++ memory_region_init_io(&ged_st->io, obj, &ged_ops, ged_st, ++ TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); ++ sysbus_init_mmio(sbd, &ged_st->io); ++ ++ sysbus_init_irq(sbd, &s->irq); ++ ++ s->memhp_state.is_enabled = true; ++ /* ++ * GED handles memory hotplug event and acpi-mem-hotplug ++ * memory region gets initialized here. Create an exclusive ++ * container for memory hotplug IO and expose it as GED sysbus ++ * MMIO so that boards can map it separately. ++ */ ++ memory_region_init(&s->container_memhp, OBJECT(dev), "memhp container", ++ MEMORY_HOTPLUG_IO_LEN); ++ sysbus_init_mmio(sbd, &s->container_memhp); ++ acpi_memory_hotplug_init(&s->container_memhp, OBJECT(dev), ++ &s->memhp_state, 0); ++} ++ ++static void acpi_ged_class_init(ObjectClass *class, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(class); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(class); ++ AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class); ++ ++ dc->desc = "ACPI Generic Event Device"; ++ dc->props = acpi_ged_properties; ++ dc->vmsd = &vmstate_acpi_ged; ++ ++ hc->plug = acpi_ged_device_plug_cb; ++ ++ adevc->send_event = acpi_ged_send_event; ++} ++ ++static const TypeInfo acpi_ged_info = { ++ .name = TYPE_ACPI_GED, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(AcpiGedState), ++ .instance_init = acpi_ged_initfn, ++ .class_init = acpi_ged_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { TYPE_ACPI_DEVICE_IF }, ++ { } ++ } ++}; ++ ++static void acpi_ged_register_types(void) ++{ ++ type_register_static(&acpi_ged_info); ++} ++ ++type_init(acpi_ged_register_types) +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +new file mode 100644 +index 0000000000..2049e8d873 +--- /dev/null ++++ b/include/hw/acpi/generic_event_device.h +@@ -0,0 +1,100 @@ ++/* ++ * ++ * Copyright (c) 2018 Intel Corporation ++ * Copyright (c) 2019 Huawei Technologies R & D (UK) Ltd ++ * Written by Samuel Ortiz, Shameer Kolothum ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2 or later, as published by the Free Software Foundation. ++ * ++ * The ACPI Generic Event Device (GED) is a hardware-reduced specific ++ * device[ACPI v6.1 Section 5.6.9] that handles all platform events, ++ * including the hotplug ones. Generic Event Device allows platforms ++ * to handle interrupts in ACPI ASL statements. It follows a very ++ * similar approach like the _EVT method from GPIO events. All ++ * interrupts are listed in _CRS and the handler is written in _EVT ++ * method. Here, we use a single interrupt for the GED device, relying ++ * on IO memory region to communicate the type of device affected by ++ * the interrupt. This way, we can support up to 32 events with a ++ * unique interrupt. ++ * ++ * Here is an example. ++ * ++ * Device (\_SB.GED) ++ * { ++ * Name (_HID, "ACPI0013") ++ * Name (_UID, Zero) ++ * Name (_CRS, ResourceTemplate () ++ * { ++ * Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) ++ * { ++ * 0x00000029, ++ * } ++ * }) ++ * OperationRegion (EREG, SystemMemory, 0x09080000, 0x04) ++ * Field (EREG, DWordAcc, NoLock, WriteAsZeros) ++ * { ++ * ESEL, 32 ++ * } ++ * ++ * Method (_EVT, 1, Serialized) // _EVT: Event ++ * { ++ * Local0 = ESEL // ESEL = IO memory region which specifies the ++ * // device type. ++ * If (((Local0 & One) == One)) ++ * { ++ * MethodEvent1() ++ * } ++ * If ((Local0 & 0x2) == 0x2) ++ * { ++ * MethodEvent2() ++ * } ++ * ... ++ * } ++ * } ++ * ++ */ ++ ++#ifndef HW_ACPI_GED_H ++#define HW_ACPI_GED_H ++ ++#include "hw/sysbus.h" ++#include "hw/acpi/memory_hotplug.h" ++ ++#define TYPE_ACPI_GED "acpi-ged" ++#define ACPI_GED(obj) \ ++ OBJECT_CHECK(AcpiGedState, (obj), TYPE_ACPI_GED) ++ ++#define ACPI_GED_EVT_SEL_OFFSET 0x0 ++#define ACPI_GED_EVT_SEL_LEN 0x4 ++ ++#define GED_DEVICE "GED" ++#define AML_GED_EVT_REG "EREG" ++#define AML_GED_EVT_SEL "ESEL" ++ ++/* ++ * Platforms need to specify the GED event bitmap ++ * to describe what kind of events they want to support ++ * through GED. ++ */ ++#define ACPI_GED_MEM_HOTPLUG_EVT 0x1 ++ ++typedef struct GEDState { ++ MemoryRegion io; ++ uint32_t sel; ++} GEDState; ++ ++typedef struct AcpiGedState { ++ SysBusDevice parent_obj; ++ MemHotplugState memhp_state; ++ MemoryRegion container_memhp; ++ GEDState ged_state; ++ uint32_t ged_event_bitmap; ++ qemu_irq irq; ++} AcpiGedState; ++ ++void build_ged_aml(Aml *table, const char* name, HotplugHandler *hotplug_dev, ++ uint32_t ged_irq, AmlRegionSpace rs, hwaddr ged_base); ++ ++#endif +-- +2.19.1 diff --git a/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch b/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch new file mode 100644 index 0000000000000000000000000000000000000000..292687199fbe797b86b25ac2fb56063f7080a0f0 --- /dev/null +++ b/hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch @@ -0,0 +1,46 @@ +From 7a08983315bf4d624966a89112259e2b4949de91 Mon Sep 17 00:00:00 2001 +From: Samuel Ortiz +Date: Wed, 18 Sep 2019 14:06:24 +0100 +Subject: [PATCH] hw/acpi: Do not create memory hotplug method when handler is + not defined + +With Hardware-reduced ACPI, the GED device will manage ACPI +hotplug entirely. As a consequence, make the memory specific +events AML generation optional. The code will only be added +when the method name is not NULL. + +Signed-off-by: Samuel Ortiz +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-3-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/acpi/memory_hotplug.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c +index 9a515c0484..8b30356c1a 100644 +--- a/hw/acpi/memory_hotplug.c ++++ b/hw/acpi/memory_hotplug.c +@@ -711,10 +711,12 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + } + aml_append(table, dev_container); + +- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); +- aml_append(method, +- aml_call0(MEMORY_DEVICES_CONTAINER "." MEMORY_SLOT_SCAN_METHOD)); +- aml_append(table, method); ++ if (event_handler_method) { ++ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); ++ aml_append(method, aml_call0(MEMORY_DEVICES_CONTAINER "." ++ MEMORY_SLOT_SCAN_METHOD)); ++ aml_append(table, method); ++ } + + g_free(mhp_res_path); + } +-- +2.19.1 diff --git a/hw-acpi-Make-ACPI-IO-address-space-configurable.patch b/hw-acpi-Make-ACPI-IO-address-space-configurable.patch new file mode 100644 index 0000000000000000000000000000000000000000..cdf597b51566400b17ddac3e27dc93cb65a61bd6 --- /dev/null +++ b/hw-acpi-Make-ACPI-IO-address-space-configurable.patch @@ -0,0 +1,196 @@ +From 6cd7281c73ca462b2f27969f1e28f1afd3ebe82d Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:23 +0100 +Subject: [PATCH] hw/acpi: Make ACPI IO address space configurable + +This is in preparation for adding support for ARM64 platforms +where it doesn't use port mapped IO for ACPI IO space. We are +making changes so that MMIO region can be accommodated +and board can pass the base address into the aml build function. + +Also move few MEMORY_* definitions to header so that other memory +hotplug event signalling mechanisms (eg. Generic Event Device on +HW-reduced acpi platforms) can use the same from their respective +event handler code. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-2-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/acpi/memory_hotplug.c | 33 ++++++++++++++------------------ + hw/i386/acpi-build.c | 7 ++++++- + hw/i386/pc.c | 3 +++ + include/hw/acpi/memory_hotplug.h | 9 +++++++-- + include/hw/i386/pc.h | 3 +++ + 5 files changed, 33 insertions(+), 22 deletions(-) + +diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c +index 297812d5f7..9a515c0484 100644 +--- a/hw/acpi/memory_hotplug.c ++++ b/hw/acpi/memory_hotplug.c +@@ -29,12 +29,7 @@ + #define MEMORY_SLOT_PROXIMITY_METHOD "MPXM" + #define MEMORY_SLOT_EJECT_METHOD "MEJ0" + #define MEMORY_SLOT_NOTIFY_METHOD "MTFY" +-#define MEMORY_SLOT_SCAN_METHOD "MSCN" + #define MEMORY_HOTPLUG_DEVICE "MHPD" +-#define MEMORY_HOTPLUG_IO_LEN 24 +-#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC" +- +-static uint16_t memhp_io_base; + + static ACPIOSTInfo *acpi_memory_device_status(int slot, MemStatus *mdev) + { +@@ -209,7 +204,7 @@ static const MemoryRegionOps acpi_memory_hotplug_ops = { + }; + + void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, +- MemHotplugState *state, uint16_t io_base) ++ MemHotplugState *state, hwaddr io_base) + { + MachineState *machine = MACHINE(qdev_get_machine()); + +@@ -218,12 +213,10 @@ void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, + return; + } + +- assert(!memhp_io_base); +- memhp_io_base = io_base; + state->devs = g_malloc0(sizeof(*state->devs) * state->dev_count); + memory_region_init_io(&state->io, owner, &acpi_memory_hotplug_ops, state, + "acpi-mem-hotplug", MEMORY_HOTPLUG_IO_LEN); +- memory_region_add_subregion(as, memhp_io_base, &state->io); ++ memory_region_add_subregion(as, io_base, &state->io); + } + + /** +@@ -342,7 +335,8 @@ const VMStateDescription vmstate_memory_hotplug = { + + void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + const char *res_root, +- const char *event_handler_method) ++ const char *event_handler_method, ++ AmlRegionSpace rs, hwaddr memhp_io_base) + { + int i; + Aml *ifctx; +@@ -351,10 +345,6 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + Aml *mem_ctrl_dev; + char *mhp_res_path; + +- if (!memhp_io_base) { +- return; +- } +- + mhp_res_path = g_strdup_printf("%s." MEMORY_HOTPLUG_DEVICE, res_root); + mem_ctrl_dev = aml_device("%s", mhp_res_path); + { +@@ -365,14 +355,19 @@ void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + aml_name_decl("_UID", aml_string("Memory hotplug resources"))); + + crs = aml_resource_template(); +- aml_append(crs, +- aml_io(AML_DECODE16, memhp_io_base, memhp_io_base, 0, +- MEMORY_HOTPLUG_IO_LEN) +- ); ++ if (rs == AML_SYSTEM_IO) { ++ aml_append(crs, ++ aml_io(AML_DECODE16, memhp_io_base, memhp_io_base, 0, ++ MEMORY_HOTPLUG_IO_LEN) ++ ); ++ } else { ++ aml_append(crs, aml_memory32_fixed(memhp_io_base, ++ MEMORY_HOTPLUG_IO_LEN, AML_READ_WRITE)); ++ } + aml_append(mem_ctrl_dev, aml_name_decl("_CRS", crs)); + + aml_append(mem_ctrl_dev, aml_operation_region( +- MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO, ++ MEMORY_HOTPLUG_IO_REGION, rs, + aml_int(memhp_io_base), MEMORY_HOTPLUG_IO_LEN) + ); + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index f3fdfefcd5..749218561a 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1871,7 +1871,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + build_cpus_aml(dsdt, machine, opts, pm->cpu_hp_io_base, + "\\_SB.PCI0", "\\_GPE._E02"); + } +- build_memory_hotplug_aml(dsdt, nr_mem, "\\_SB.PCI0", "\\_GPE._E03"); ++ ++ if (pcms->memhp_io_base && nr_mem) { ++ build_memory_hotplug_aml(dsdt, nr_mem, "\\_SB.PCI0", ++ "\\_GPE._E03", AML_SYSTEM_IO, ++ pcms->memhp_io_base); ++ } + + scope = aml_scope("_GPE"); + { +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index d011733ff7..8a914130b0 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1936,6 +1936,9 @@ void pc_memory_init(PCMachineState *pcms, + + /* Init default IOAPIC address space */ + pcms->ioapic_as = &address_space_memory; ++ ++ /* Init ACPI memory hotplug IO base address */ ++ pcms->memhp_io_base = ACPI_MEMORY_HOTPLUG_BASE; + } + + /* +diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h +index 77c65765d6..dfe9cf3fde 100644 +--- a/include/hw/acpi/memory_hotplug.h ++++ b/include/hw/acpi/memory_hotplug.h +@@ -5,6 +5,10 @@ + #include "hw/acpi/acpi.h" + #include "hw/acpi/aml-build.h" + ++#define MEMORY_SLOT_SCAN_METHOD "MSCN" ++#define MEMORY_DEVICES_CONTAINER "\\_SB.MHPC" ++#define MEMORY_HOTPLUG_IO_LEN 24 ++ + /** + * MemStatus: + * @is_removing: the memory device in slot has been requested to be ejected. +@@ -29,7 +33,7 @@ typedef struct MemHotplugState { + } MemHotplugState; + + void acpi_memory_hotplug_init(MemoryRegion *as, Object *owner, +- MemHotplugState *state, uint16_t io_base); ++ MemHotplugState *state, hwaddr io_base); + + void acpi_memory_plug_cb(HotplugHandler *hotplug_dev, MemHotplugState *mem_st, + DeviceState *dev, Error **errp); +@@ -48,5 +52,6 @@ void acpi_memory_ospm_status(MemHotplugState *mem_st, ACPIOSTInfoList ***list); + + void build_memory_hotplug_aml(Aml *table, uint32_t nr_mem, + const char *res_root, +- const char *event_handler_method); ++ const char *event_handler_method, ++ AmlRegionSpace rs, hwaddr memhp_io_base); + #endif +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 859b64c51d..49b47535cf 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -69,6 +69,9 @@ struct PCMachineState { + /* Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. */ + AddressSpace *ioapic_as; ++ ++ /* ACPI Memory hotplug IO base address */ ++ hwaddr memhp_io_base; + }; + + #define PC_MACHINE_ACPI_DEVICE_PROP "acpi-device" +-- +2.19.1 diff --git a/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch b/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2c0f3900d05652a977be13df0babb8210c65b1b --- /dev/null +++ b/hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch @@ -0,0 +1,72 @@ +From e6b1fd7bfbfe116e9d5df590f7069336c1eb1983 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:29 +0100 +Subject: [PATCH] hw/arm: Factor out powerdown notifier from GPIO + +This is in preparation of using GED device for +system_powerdown event. Make the powerdown notifier +registration independent of create_gpio() fn. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-8-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/arm/virt.c | 12 ++++-------- + include/hw/arm/virt.h | 1 + + 2 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ab33cce4b3..aaefa5578e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -910,10 +910,6 @@ static void virt_powerdown_req(Notifier *n, void *opaque) + qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); + } + +-static Notifier virt_system_powerdown_notifier = { +- .notify = virt_powerdown_req +-}; +- + static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) + { + char *nodename; +@@ -954,10 +950,6 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) + KEY_POWER); + qemu_fdt_setprop_cells(vms->fdt, "/gpio-keys/poweroff", + "gpios", phandle, 3, 0); +- +- /* connect powerdown request */ +- qemu_register_powerdown_notifier(&virt_system_powerdown_notifier); +- + g_free(nodename); + } + +@@ -1856,6 +1848,10 @@ static void machvirt_init(MachineState *machine) + vms->acpi_dev = create_acpi_ged(vms, pic); + } + ++ /* connect powerdown request */ ++ vms->powerdown_notifier.notify = virt_powerdown_req; ++ qemu_register_powerdown_notifier(&vms->powerdown_notifier); ++ + /* Create mmio transports, so the user can create virtio backends + * (which will be automatically plugged in to the transports). If + * no backend is created the transport will just sit harmlessly idle. +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 0350285136..dcceb9c615 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -139,6 +139,7 @@ typedef struct { + int psci_conduit; + hwaddr highest_gpa; + DeviceState *acpi_dev; ++ Notifier powerdown_notifier; + } VirtMachineState; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +-- +2.19.1 diff --git a/hw-arm-Use-GED-for-system_powerdown-event.patch b/hw-arm-Use-GED-for-system_powerdown-event.patch new file mode 100644 index 0000000000000000000000000000000000000000..140f59a2c71da40f47a2edd2bd2dd529227c3ddc --- /dev/null +++ b/hw-arm-Use-GED-for-system_powerdown-event.patch @@ -0,0 +1,167 @@ +From 0b77f242b180f1ae40b9752999cef4894113df8e Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:30 +0100 +Subject: [PATCH] hw/arm: Use GED for system_powerdown event + +For machines 4.2 or higher with ACPI boot use GED for system_powerdown +event instead of GPIO. Guest boot with DT still uses GPIO. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Eric Auger +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-9-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + hw/arm/virt-acpi-build.c | 6 +++--- + hw/arm/virt.c | 18 ++++++++++++------ + include/hw/acpi/acpi_dev_interface.h | 1 + + include/hw/acpi/generic_event_device.h | 3 +++ + 5 files changed, 27 insertions(+), 9 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index b94500b08d..9cee90cc70 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -22,6 +22,7 @@ + + static const uint32_t ged_supported_events[] = { + ACPI_GED_MEM_HOTPLUG_EVT, ++ ACPI_GED_PWR_DOWN_EVT, + }; + + /* +@@ -104,6 +105,11 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." + MEMORY_SLOT_SCAN_METHOD)); + break; ++ case ACPI_GED_PWR_DOWN_EVT: ++ aml_append(if_ctx, ++ aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), ++ aml_int(0x80))); ++ break; + default: + /* + * Please make sure all the events in ged_supported_events[] +@@ -184,6 +190,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + if (ev & ACPI_MEMORY_HOTPLUG_STATUS) { + sel = ACPI_GED_MEM_HOTPLUG_EVT; ++ } else if (ev & ACPI_POWER_DOWN_STATUS) { ++ sel = ACPI_GED_PWR_DOWN_EVT; + } else { + /* Unknown event. Return without generating interrupt. */ + warn_report("GED: Unsupported event %d. No irq injected", ev); +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 9622994e50..f48733d9f2 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -50,7 +50,6 @@ + #include "hw/acpi/acpi-defs.h" + + #define ARM_SPI_BASE 32 +-#define ACPI_POWER_BUTTON_DEVICE "PWRB" + + static void acpi_dsdt_add_psd(Aml *dev, int cpus) + { +@@ -813,13 +812,14 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + (irqmap[VIRT_MMIO] + ARM_SPI_BASE), NUM_VIRTIO_TRANSPORTS); + acpi_dsdt_add_pci(scope, memmap, (irqmap[VIRT_PCIE] + ARM_SPI_BASE), + vms->highmem, vms->highmem_ecam); +- acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], +- (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); + if (vms->acpi_dev) { + build_ged_aml(scope, "\\_SB."GED_DEVICE, + HOTPLUG_HANDLER(vms->acpi_dev), + irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY, + memmap[VIRT_ACPI_GED].base); ++ } else { ++ acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], ++ (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); + } + + if (vms->acpi_dev) { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index aaefa5578e..18321e522b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -639,10 +639,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) + DeviceState *dev; + MachineState *ms = MACHINE(vms); + int irq = vms->irqmap[VIRT_ACPI_GED]; +- uint32_t event = 0; ++ uint32_t event = ACPI_GED_PWR_DOWN_EVT; + + if (ms->ram_slots) { +- event = ACPI_GED_MEM_HOTPLUG_EVT; ++ event |= ACPI_GED_MEM_HOTPLUG_EVT; + } + + dev = qdev_create(NULL, TYPE_ACPI_GED); +@@ -906,8 +906,14 @@ static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) + static DeviceState *gpio_key_dev; + static void virt_powerdown_req(Notifier *n, void *opaque) + { +- /* use gpio Pin 3 for power button event */ +- qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); ++ VirtMachineState *s = container_of(n, VirtMachineState, powerdown_notifier); ++ ++ if (s->acpi_dev) { ++ acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); ++ } else { ++ /* use gpio Pin 3 for power button event */ ++ qemu_set_irq(qdev_get_gpio_in(gpio_key_dev, 0), 1); ++ } + } + + static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) +@@ -1842,10 +1848,10 @@ static void machvirt_init(MachineState *machine) + + create_pcie(vms, pic); + +- create_gpio(vms, pic); +- + if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { + vms->acpi_dev = create_acpi_ged(vms, pic); ++ } else { ++ create_gpio(vms, pic); + } + + /* connect powerdown request */ +diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h +index 43ff119179..adcb3a816c 100644 +--- a/include/hw/acpi/acpi_dev_interface.h ++++ b/include/hw/acpi/acpi_dev_interface.h +@@ -11,6 +11,7 @@ typedef enum { + ACPI_MEMORY_HOTPLUG_STATUS = 8, + ACPI_NVDIMM_HOTPLUG_STATUS = 16, + ACPI_VMGENID_CHANGE_STATUS = 32, ++ ACPI_POWER_DOWN_STATUS = 64, + } AcpiEventStatusBits; + + #define TYPE_ACPI_DEVICE_IF "acpi-device-interface" +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index 2049e8d873..d157eac088 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -62,6 +62,8 @@ + #include "hw/sysbus.h" + #include "hw/acpi/memory_hotplug.h" + ++#define ACPI_POWER_BUTTON_DEVICE "PWRB" ++ + #define TYPE_ACPI_GED "acpi-ged" + #define ACPI_GED(obj) \ + OBJECT_CHECK(AcpiGedState, (obj), TYPE_ACPI_GED) +@@ -79,6 +81,7 @@ + * through GED. + */ + #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 ++#define ACPI_GED_PWR_DOWN_EVT 0x2 + + typedef struct GEDState { + MemoryRegion io; +-- +2.19.1 diff --git a/hw-arm-acpi-enable-SHPC-native-hot-plug.patch b/hw-arm-acpi-enable-SHPC-native-hot-plug.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b2e530bb8e3555b4f9cf2a807b060ac62ccd9de --- /dev/null +++ b/hw-arm-acpi-enable-SHPC-native-hot-plug.patch @@ -0,0 +1,45 @@ +From 1ad2e774f4fd3f720d5db07e86fe60df13f21a6d Mon Sep 17 00:00:00 2001 +From: Heyi Guo +Date: Mon, 9 Dec 2019 14:37:19 +0800 +Subject: [PATCH] hw/arm/acpi: enable SHPC native hot plug + +After the introduction of generic PCIe root port and PCIe-PCI bridge, +we will also have SHPC controller on ARM, so just enable SHPC native +hot plug. + +Also update tests/data/acpi/virt/DSDT* to pass "make check". + +Cc: Shannon Zhao +Cc: Peter Maydell +Cc: "Michael S. Tsirkin" +Cc: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +Signed-off-by: Heyi Guo +Message-id: 20191209063719.23086-3-guoheyi@huawei.com +Signed-off-by: Peter Maydell +--- + hw/arm/virt-acpi-build.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 2cfac7b84f..588e7f2680 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -347,7 +347,12 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap, + aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3")); + aml_append(ifctx, aml_store(aml_name("CDW2"), aml_name("SUPP"))); + aml_append(ifctx, aml_store(aml_name("CDW3"), aml_name("CTRL"))); +- aml_append(ifctx, aml_store(aml_and(aml_name("CTRL"), aml_int(0x1D), NULL), ++ ++ /* ++ * Allow OS control for all 5 features: ++ * PCIeHotplug SHPCHotplug PME AER PCIeCapability. ++ */ ++ aml_append(ifctx, aml_store(aml_and(aml_name("CTRL"), aml_int(0x1F), NULL), + aml_name("CTRL"))); + + ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1)))); +-- +2.23.0 + diff --git a/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch b/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..95abd07e5a2b62acd14c1a45e61ce471707f173e --- /dev/null +++ b/hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch @@ -0,0 +1,115 @@ +From de86ba0ff72a51b0c1cdbebf790869aea73ae9d3 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 9 Apr 2020 09:31:22 +0800 +Subject: [PATCH] hw/arm/boot: Add manually register and trigger of CPU reset + +We need to register and trigger CPU reset manually for hotplugged +CPU. Besides, we gather CPU reset handlers of all CPUs because CPU +reset should happen before GIC reset. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/boot.c | 18 ++++++++++++++++++ + hw/core/reset.c | 25 +++++++++++++++++++++++++ + include/hw/arm/boot.h | 3 +++ + include/sysemu/reset.h | 4 ++++ + 4 files changed, 50 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index fc4e021a38..3ab9de6456 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -789,6 +789,24 @@ static void do_cpu_reset(void *opaque) + } + } + ++void cpu_hotplug_register_reset(int ncpu) ++{ ++ CPUState *cpu_0 = qemu_get_cpu(0); ++ CPUState *cpu = qemu_get_cpu(ncpu); ++ QEMUResetEntry *entry = qemu_get_reset_entry(do_cpu_reset, cpu_0); ++ ++ assert(entry); ++ /* Gather the reset handlers of all CPUs */ ++ qemu_register_reset_after(entry, do_cpu_reset, cpu); ++} ++ ++void cpu_hotplug_reset_manually(int ncpu) ++{ ++ CPUState *cpu = qemu_get_cpu(ncpu); ++ ++ do_cpu_reset(cpu); ++} ++ + /** + * load_image_to_fw_cfg() - Load an image file into an fw_cfg entry identified + * by key. +diff --git a/hw/core/reset.c b/hw/core/reset.c +index 9c477f2bf5..0efaf2d76c 100644 +--- a/hw/core/reset.c ++++ b/hw/core/reset.c +@@ -47,6 +47,31 @@ void qemu_register_reset(QEMUResetHandler *func, void *opaque) + QTAILQ_INSERT_TAIL(&reset_handlers, re, entry); + } + ++QEMUResetEntry *qemu_get_reset_entry(QEMUResetHandler *func, ++ void *opaque) ++{ ++ QEMUResetEntry *re; ++ ++ QTAILQ_FOREACH(re, &reset_handlers, entry) { ++ if (re->func == func && re->opaque == opaque) { ++ return re; ++ } ++ } ++ ++ return NULL; ++} ++ ++void qemu_register_reset_after(QEMUResetEntry *entry, ++ QEMUResetHandler *func, ++ void *opaque) ++{ ++ QEMUResetEntry *re = g_malloc0(sizeof(QEMUResetEntry)); ++ ++ re->func = func; ++ re->opaque = opaque; ++ QTAILQ_INSERT_AFTER(&reset_handlers, entry, re, entry); ++} ++ + void qemu_unregister_reset(QEMUResetHandler *func, void *opaque) + { + QEMUResetEntry *re; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index c48cc4c2bc..9452ccd1fa 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -118,6 +118,9 @@ struct arm_boot_info { + arm_endianness endianness; + }; + ++void cpu_hotplug_register_reset(int ncpu); ++void cpu_hotplug_reset_manually(int ncpu); ++ + /** + * arm_load_kernel - Loads memory with everything needed to boot + * +diff --git a/include/sysemu/reset.h b/include/sysemu/reset.h +index 0b0d6d7598..f3ff26c637 100644 +--- a/include/sysemu/reset.h ++++ b/include/sysemu/reset.h +@@ -2,7 +2,11 @@ + #define QEMU_SYSEMU_RESET_H + + typedef void QEMUResetHandler(void *opaque); ++typedef struct QEMUResetEntry QEMUResetEntry; + ++QEMUResetEntry *qemu_get_reset_entry(QEMUResetHandler *func, void *opaque); ++void qemu_register_reset_after(QEMUResetEntry *entry, ++ QEMUResetHandler *func, void *opaque); + void qemu_register_reset(QEMUResetHandler *func, void *opaque); + void qemu_unregister_reset(QEMUResetHandler *func, void *opaque); + void qemu_devices_reset(void); +-- +2.19.1 diff --git a/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch b/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7ca51a5deb7146de6d6b05aa1d8391b0c08adac --- /dev/null +++ b/hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch @@ -0,0 +1,47 @@ +From 220816989c1e3d490d293b8d7ac85dbc41a4c321 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 20 Sep 2019 18:40:39 +0100 +Subject: [PATCH] hw/arm/boot.c: Set NSACR.{CP11,CP10} for NS kernel boots + +If we're booting a Linux kernel directly into Non-Secure +state on a CPU which has Secure state, then make sure we +set the NSACR CP11 and CP10 bits, so that Non-Secure is allowed +to access the FPU. Otherwise an AArch32 kernel will UNDEF as +soon as it tries to use the FPU. + +It used to not matter that we didn't do this until commit +fc1120a7f5f2d4b6, where we implemented actually honouring +these NSACR bits. + +The problem only exists for CPUs where EL3 is AArch32; the +equivalent AArch64 trap bits are in CPTR_EL3 and are "0 to +not trap, 1 to trap", so the reset value of the register +permits NS access, unlike NSACR. + +Fixes: fc1120a7f5 +Fixes: https://bugs.launchpad.net/qemu/+bug/1844597 +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20190920174039.3916-1-peter.maydell@linaro.org +(cherry picked from commit ece628fcf69cbbd4b3efb6fbd203af07609467a2) +Signed-off-by: Michael Roth +--- + hw/arm/boot.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index c2b89b3bb9..fc4e021a38 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -754,6 +754,8 @@ static void do_cpu_reset(void *opaque) + (cs != first_cpu || !info->secure_board_setup)) { + /* Linux expects non-secure state */ + env->cp15.scr_el3 |= SCR_NS; ++ /* Set NSACR.{CP11,CP10} so NS can access the FPU */ ++ env->cp15.nsacr |= 3 << 10; + } + } + +-- +2.23.0 diff --git a/hw-arm-expose-host-CPU-frequency-info-to-guest.patch b/hw-arm-expose-host-CPU-frequency-info-to-guest.patch index 0b04076e5478d671dc55937313d677e603d23720..f0093812ed61e769afec350993e0298a4c5f9e10 100644 --- a/hw-arm-expose-host-CPU-frequency-info-to-guest.patch +++ b/hw-arm-expose-host-CPU-frequency-info-to-guest.patch @@ -1,54 +1,57 @@ -From 773b25c55c7428b64d21b23a6b08fc629a665ca5 Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Mon, 29 Jul 2019 09:54:43 +0800 -Subject: [PATCH] hw/arm: expose host CPU frequency info to guest +From b70d020dba72283d7b16a77c377512c84aab5f81 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 20 Apr 2020 10:38:12 +0800 +Subject: [PATCH] arm64: Add the cpufreq device to show cpufreq info to guest -On ARM64, CPU frequency is fetched by ACPI CPPC, so we add virtual -CPPC registers and ACPI _CPC objects. +On ARM64 platform, cpu frequency is retrieved via ACPI CPPC. +A virtual cpufreq device based on ACPI CPPC is created to +present cpu frequency info to the guest. -The default frequency is set to the nominal frequency of Hi1616, which -will not support CPPC in future. On Hi1620 we are fetching the value -from Host CPPC sys file. +The default frequency is set to host cpu nominal frequency, +which is obtained from the host CPPC sysfs. Other performance +data are set to the same value, since we don't support guest +performance scaling here. -All performance data are set to the same value for we don't support -guest initiating performance scaling. - -We don't emulate performance counters and simply return 1 for all -counter readings, and guest Linux should fall back to use the desired +Performance counters are also not emulated and they simply +return 1 if read, and guest should fallback to use desired performance value as the current performance. -Signed-off-by: Heyi Guo -Signed-off-by: zhanghailiang +Guest kernel version above 4.18 is required to make it work. + +This series is backported from: +https://patchwork.kernel.org/cover/11379943/ + +Signed-off-by: Ying Fang --- default-configs/aarch64-softmmu.mak | 1 + hw/acpi/Makefile.objs | 1 + hw/acpi/aml-build.c | 22 +++ - hw/acpi/cpufreq.c | 278 ++++++++++++++++++++++++++++ + hw/acpi/cpufreq.c | 287 ++++++++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 78 +++++++- hw/arm/virt.c | 13 ++ hw/char/Kconfig | 4 + include/hw/acpi/acpi-defs.h | 38 ++++ include/hw/acpi/aml-build.h | 3 + include/hw/arm/virt.h | 1 + - 10 files changed, 437 insertions(+), 2 deletions(-) + 10 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 hw/acpi/cpufreq.c diff --git a/default-configs/aarch64-softmmu.mak b/default-configs/aarch64-softmmu.mak -index 4ea9add0..37399c14 100644 +index 958b1e08..0a030e85 100644 --- a/default-configs/aarch64-softmmu.mak +++ b/default-configs/aarch64-softmmu.mak -@@ -10,3 +10,4 @@ CONFIG_XLNX_ZYNQMP=y +@@ -6,3 +6,4 @@ include arm-softmmu.mak CONFIG_XLNX_ZYNQMP_ARM=y CONFIG_XLNX_VERSAL=y - CONFIG_ARM_SMMUV3=y + CONFIG_SBSA_REF=y +CONFIG_CPUFREQ=y diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs -index 2d46e378..60979db9 100644 +index 9bb2101e..1a720c38 100644 --- a/hw/acpi/Makefile.objs +++ b/hw/acpi/Makefile.objs -@@ -12,6 +12,7 @@ common-obj-y += acpi_interface.o - common-obj-y += bios-linker-loader.o - common-obj-y += aml-build.o +@@ -13,6 +13,7 @@ common-obj-y += bios-linker-loader.o + common-obj-y += aml-build.o utils.o + common-obj-$(CONFIG_ACPI_PCI) += pci.o common-obj-$(CONFIG_TPM) += tpm.o +common-obj-$(CONFIG_CPUFREQ) += cpufreq.o @@ -89,10 +92,10 @@ index 555c24f2..73f97751 100644 int hi, lo; diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c new file mode 100644 -index 00000000..c123a22b +index 00000000..d02a25a6 --- /dev/null +++ b/hw/acpi/cpufreq.c -@@ -0,0 +1,278 @@ +@@ -0,0 +1,287 @@ +/* + * ACPI CPPC register device + * @@ -124,6 +127,7 @@ index 00000000..c123a22b +#include "hw/acpi/acpi-defs.h" +#include "qemu/cutils.h" +#include "qemu/error-report.h" ++#include "hw/boards.h" + +#define TYPE_CPUFREQ "cpufreq" +#define CPUFREQ(obj) OBJECT_CHECK(CpuhzState, (obj), TYPE_CPUFREQ) @@ -189,6 +193,9 @@ index 00000000..c123a22b + uint64_t r; + uint64_t n; + ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ + if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { + warn_report("cpufreq_read: offset 0x%lx out of range", offset); + return 0; @@ -258,6 +265,8 @@ index 00000000..c123a22b + uint64_t value, unsigned size) +{ + uint64_t n; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; + + if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { + error_printf("cpufreq_write: offset 0x%lx out of range", offset); @@ -339,6 +348,9 @@ index 00000000..c123a22b + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + CpuhzState *s = CPUFREQ(obj); + ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ + s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; + if (s->reg_size > MAX_SUPPORT_SPACE) { + error_report("Required space 0x%x excesses the max support 0x%x", @@ -372,7 +384,7 @@ index 00000000..c123a22b +type_init(cpufreq_register_types) + diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index bf9c0bc2..33a8e2e3 100644 +index 0afb3727..29494ebd 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -45,11 +45,73 @@ @@ -469,7 +481,7 @@ index bf9c0bc2..33a8e2e3 100644 aml_append(scope, dev); } } -@@ -740,7 +814,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +@@ -718,7 +792,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) * the RTC ACPI device at all when using UEFI. */ scope = aml_scope("\\_SB"); @@ -479,10 +491,10 @@ index bf9c0bc2..33a8e2e3 100644 (irqmap[VIRT_UART] + ARM_SPI_BASE)); acpi_dsdt_add_flash(scope, &memmap[VIRT_FLASH]); diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ce2664a3..ec6f00ab 100644 +index d9496c93..0fa355ba 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -132,6 +132,7 @@ static const MemMapEntry base_memmap[] = { +@@ -135,6 +135,7 @@ static const MemMapEntry base_memmap[] = { [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, [VIRT_SMMU] = { 0x09050000, 0x00020000 }, [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, @@ -490,7 +502,7 @@ index ce2664a3..ec6f00ab 100644 /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, -@@ -725,6 +726,16 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, +@@ -731,6 +732,16 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, g_free(nodename); } @@ -507,7 +519,7 @@ index ce2664a3..ec6f00ab 100644 static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) { char *nodename; -@@ -1618,6 +1629,8 @@ static void machvirt_init(MachineState *machine) +@@ -1682,6 +1693,8 @@ static void machvirt_init(MachineState *machine) create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0)); @@ -517,10 +529,10 @@ index ce2664a3..ec6f00ab 100644 create_secure_ram(vms, secure_sysmem); create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); diff --git a/hw/char/Kconfig b/hw/char/Kconfig -index 6360c9ff..8cc3ae2a 100644 +index 40e7a8b8..2f61bf53 100644 --- a/hw/char/Kconfig +++ b/hw/char/Kconfig -@@ -40,3 +40,7 @@ config SCLPCONSOLE +@@ -46,3 +46,7 @@ config SCLPCONSOLE config TERMINAL3270 bool @@ -529,10 +541,10 @@ index 6360c9ff..8cc3ae2a 100644 + bool + default y diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h -index f9aa4bd3..b4899a32 100644 +index 57a3f58b..39ae91d3 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h -@@ -652,4 +652,42 @@ struct AcpiIortRC { +@@ -634,4 +634,42 @@ struct AcpiIortRC { } QEMU_PACKED; typedef struct AcpiIortRC AcpiIortRC; @@ -590,7 +602,7 @@ index 1a563ad7..375335ab 100644 /* Block AML object primitives */ Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2); diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index 507517c6..8465f9bd 100644 +index a7209420..43a6ce91 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -66,6 +66,7 @@ enum { @@ -602,5 +614,5 @@ index 507517c6..8465f9bd 100644 VIRT_RTC, VIRT_FW_CFG, -- -2.19.1 +2.23.0 diff --git a/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch b/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch new file mode 100644 index 0000000000000000000000000000000000000000..1824b8e4fd0fa31603df903bb640065c70858907 --- /dev/null +++ b/hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch @@ -0,0 +1,33 @@ +From 8bf9d1dc67335c1fb921a56825f6bf198a568091 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 19 Mar 2021 12:22:48 -0400 +Subject: [PATCH] hw/arm/smmu-common: Allow domain invalidation for + NH_ALL/NSNH_ALL + +NH_ALL/NSNH_ALL corresponds to a domain granularity invalidation, +ie. all the notifier range gets invalidation, whatever the ASID. +So let's set the granularity to IOMMU_INV_GRAN_DOMAIN to allow +the consumer to benefit from the info if it can. + +Signed-off-by: Eric Auger +Suggested-by: chenxiang (M) +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmu-common.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 717d22bcbe..de9468d33f 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -395,6 +395,7 @@ static void smmu_unmap_notifier_range(IOMMUNotifier *n) + entry.iova = n->start; + entry.perm = IOMMU_NONE; + entry.addr_mask = n->end - n->start; ++ entry.granularity = IOMMU_INV_GRAN_DOMAIN; + + memory_region_notify_one(n, &entry); + } +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch b/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch new file mode 100644 index 0000000000000000000000000000000000000000..89f9292287246e65a25587df2da43f2765457312 --- /dev/null +++ b/hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch @@ -0,0 +1,32 @@ +From bc602a4d1355774a0a44e8fbf6dd842049dd63f3 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 28 Aug 2018 09:21:53 -0400 +Subject: [PATCH] hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute + +The SMMUv3 has the peculiarity to translate MSI +transactionss. let's advertise the corresponding +attribute. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 55eed5189e..83d59b6d28 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1538,6 +1538,9 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, + if (attr == IOMMU_ATTR_VFIO_NESTED) { + *(bool *) data = true; + return 0; ++ } else if (attr == IOMMU_ATTR_MSI_TRANSLATE) { ++ *(bool *) data = true; ++ return 0; + } + return -EINVAL; + } +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Allow-MAP-notifiers.patch b/hw-arm-smmuv3-Allow-MAP-notifiers.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec050121fcd57a2e942774ce76fceb8ed5039cf2 --- /dev/null +++ b/hw-arm-smmuv3-Allow-MAP-notifiers.patch @@ -0,0 +1,37 @@ +From 965729b4875f637dacdbf82960347beb65512d12 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 18 Mar 2020 11:17:36 +0100 +Subject: [PATCH] hw/arm/smmuv3: Allow MAP notifiers + +We now have all bricks to support nested paging. This +uses MAP notifiers to map the MSIs. So let's allow MAP +notifiers to be registered. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 931d6eae57..c26fba118c 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1563,14 +1563,6 @@ static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + SMMUv3State *s3 = sdev->smmu; + SMMUState *s = &(s3->smmu_state); + +- if (new & IOMMU_NOTIFIER_MAP) { +- int bus_num = pci_bus_num(sdev->bus); +- PCIDevice *pcidev = pci_find_device(sdev->bus, bus_num, sdev->devfn); +- +- warn_report("SMMUv3 does not support notification on MAP: " +- "device %s will not function properly", pcidev->name); +- } +- + if (old == IOMMU_NOTIFIER_NONE) { + trace_smmuv3_notify_flag_add(iommu->parent_obj.name); + QLIST_INSERT_HEAD(&s->devices_with_notifiers, sdev, next); +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch b/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..1f3425e7eddae1fee87d0cb8d86587f4e6011ee5 --- /dev/null +++ b/hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch @@ -0,0 +1,34 @@ +From 8108317641b3cb378bf1862dc3c0a73d1e0976ce Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 4 Sep 2018 08:48:33 -0400 +Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA + invalidation + +When the guest invalidates one S1 entry, it passes the asid. +When propagating this invalidation downto the host, the asid +information also must be passed. So let's fill the arch_id field +introduced for that purpose and accordingly set the flags to +indicate its presence. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index f8e721f949..c6b950af35 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -824,6 +824,8 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + entry.iova = iova; + entry.addr_mask = (1 << tt->granule_sz) - 1; + entry.perm = IOMMU_NONE; ++ entry.flags = IOMMU_INV_FLAGS_ARCHID; ++ entry.arch_id = asid; + + memory_region_notify_one(n, &entry); + } +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch b/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch new file mode 100644 index 0000000000000000000000000000000000000000..febaffaa655ecbe70419d692e586e56b1561f330 --- /dev/null +++ b/hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch @@ -0,0 +1,81 @@ +From 6393ad5c1ba6a04b038d80ecc1e663ad91ed0d21 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 14 Mar 2019 09:55:13 -0400 +Subject: [PATCH] hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA + invalidation + +Let's propagate the leaf attribute throughout the invalidation path. +This hint is used to reduce the scope of the invalidations to the +last level of translation. Not enforcing it induces large performance +penalties in nested mode. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index c6b950af35..c1caa6bc3a 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -795,7 +795,7 @@ epilogue: + static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + IOMMUNotifier *n, + int asid, +- dma_addr_t iova) ++ dma_addr_t iova, bool leaf) + { + SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); + SMMUEventInfo event = {}; +@@ -826,6 +826,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + entry.perm = IOMMU_NONE; + entry.flags = IOMMU_INV_FLAGS_ARCHID; + entry.arch_id = asid; ++ entry.leaf = leaf; + + memory_region_notify_one(n, &entry); + } +@@ -854,7 +855,8 @@ static void smmuv3_notify_asid(IOMMUMemoryRegion *mr, + } + + /* invalidate an asid/iova tuple in all mr's */ +-static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) ++static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, ++ bool leaf) + { + SMMUDevice *sdev; + +@@ -865,7 +867,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) + trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, iova); + + IOMMU_NOTIFIER_FOREACH(n, mr) { +- smmuv3_notify_iova(mr, n, asid, iova); ++ smmuv3_notify_iova(mr, n, asid, iova, leaf); + } + } + } +@@ -1018,9 +1020,10 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + { + dma_addr_t addr = CMD_ADDR(&cmd); + uint16_t vmid = CMD_VMID(&cmd); ++ bool leaf = CMD_LEAF(&cmd); + + trace_smmuv3_cmdq_tlbi_nh_vaa(vmid, addr); +- smmuv3_inv_notifiers_iova(bs, -1, addr); ++ smmuv3_inv_notifiers_iova(bs, -1, addr, leaf); + smmu_iotlb_inv_all(bs); + break; + } +@@ -1032,7 +1035,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + bool leaf = CMD_LEAF(&cmd); + + trace_smmuv3_cmdq_tlbi_nh_va(vmid, asid, addr, leaf); +- smmuv3_inv_notifiers_iova(bs, asid, addr); ++ smmuv3_inv_notifiers_iova(bs, asid, addr, leaf); + smmu_iotlb_inv_iova(bs, asid, addr); + break; + } +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Implement-fault-injection.patch b/hw-arm-smmuv3-Implement-fault-injection.patch new file mode 100644 index 0000000000000000000000000000000000000000..0260e28a05e7d30ec2b637eadb2251890c7e3701 --- /dev/null +++ b/hw-arm-smmuv3-Implement-fault-injection.patch @@ -0,0 +1,107 @@ +From 55bfd18b7671c82705d83d543281add0afcda31f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 13 Sep 2018 14:24:45 +0200 +Subject: [PATCH] hw/arm/smmuv3: Implement fault injection + +We convert iommu_fault structs received from the kernel +into the data struct used by the emulation code and record +the evnts into the virtual event queue. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 71 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 3d2151857d..931d6eae57 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1594,6 +1594,76 @@ static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, + return -EINVAL; + } + ++struct iommu_fault; ++ ++static inline int ++smmuv3_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, ++ struct iommu_fault *buf) ++{ ++#ifdef __linux__ ++ SMMUDevice *sdev = container_of(iommu_mr, SMMUDevice, iommu); ++ SMMUv3State *s3 = sdev->smmu; ++ uint32_t sid = smmu_get_sid(sdev); ++ int i; ++ ++ for (i = 0; i < count; i++) { ++ SMMUEventInfo info = {}; ++ struct iommu_fault_unrecoverable *record; ++ ++ if (buf[i].type != IOMMU_FAULT_DMA_UNRECOV) { ++ continue; ++ } ++ ++ info.sid = sid; ++ record = &buf[i].event; ++ ++ switch (record->reason) { ++ case IOMMU_FAULT_REASON_PASID_INVALID: ++ info.type = SMMU_EVT_C_BAD_SUBSTREAMID; ++ /* TODO further fill info.u.c_bad_substream */ ++ break; ++ case IOMMU_FAULT_REASON_PASID_FETCH: ++ info.type = SMMU_EVT_F_CD_FETCH; ++ break; ++ case IOMMU_FAULT_REASON_BAD_PASID_ENTRY: ++ info.type = SMMU_EVT_C_BAD_CD; ++ /* TODO further fill info.u.c_bad_cd */ ++ break; ++ case IOMMU_FAULT_REASON_WALK_EABT: ++ info.type = SMMU_EVT_F_WALK_EABT; ++ info.u.f_walk_eabt.addr = record->addr; ++ info.u.f_walk_eabt.addr2 = record->fetch_addr; ++ break; ++ case IOMMU_FAULT_REASON_PTE_FETCH: ++ info.type = SMMU_EVT_F_TRANSLATION; ++ info.u.f_translation.addr = record->addr; ++ break; ++ case IOMMU_FAULT_REASON_OOR_ADDRESS: ++ info.type = SMMU_EVT_F_ADDR_SIZE; ++ info.u.f_addr_size.addr = record->addr; ++ break; ++ case IOMMU_FAULT_REASON_ACCESS: ++ info.type = SMMU_EVT_F_ACCESS; ++ info.u.f_access.addr = record->addr; ++ break; ++ case IOMMU_FAULT_REASON_PERMISSION: ++ info.type = SMMU_EVT_F_PERMISSION; ++ info.u.f_permission.addr = record->addr; ++ break; ++ default: ++ warn_report("%s Unexpected fault reason received from host: %d", ++ __func__, record->reason); ++ continue; ++ } ++ ++ smmuv3_record_event(s3, &info); ++ } ++ return 0; ++#else ++ return -1; ++#endif ++} ++ + static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + void *data) + { +@@ -1602,6 +1672,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + imrc->translate = smmuv3_translate; + imrc->notify_flag_changed = smmuv3_notify_flag_changed; + imrc->get_attr = smmuv3_get_attr; ++ imrc->inject_faults = smmuv3_inject_faults; + } + + static const TypeInfo smmuv3_type_info = { +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch b/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch new file mode 100644 index 0000000000000000000000000000000000000000..10639e89f957b970b78f2c0de930ad8b92032d0f --- /dev/null +++ b/hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch @@ -0,0 +1,105 @@ +From c0027c2e744c8ed99e937d3cbc88f400ab63a316 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Sun, 14 Feb 2021 12:30:57 -0500 +Subject: [PATCH] hw/arm/smmuv3: Improve stage1 ASID invalidation + +At the moment ASID invalidation command (CMD_TLBI_NH_ASID) is +propagated as a domain invalidation (the whole notifier range +is invalidated independently on any ASID information). + +The new granularity field now allows to be more precise and +restrict the invalidation to a peculiar ASID. Set the corresponding +fields and flag. + +We still keep the iova and addr_mask settings for consumers that +do not support the new fields, like VHOST. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + hw/arm/trace-events | 1 + + 2 files changed, 41 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 3b5723e1e1..0ef1ca376c 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -827,6 +827,29 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, + memory_region_notify_one(n, &entry); + } + ++/** ++ * smmuv3_notify_asid - call the notifier @n for a given asid ++ * ++ * @mr: IOMMU mr region handle ++ * @n: notifier to be called ++ * @asid: address space ID or negative value if we don't care ++ */ ++static void smmuv3_notify_asid(IOMMUMemoryRegion *mr, ++ IOMMUNotifier *n, int asid) ++{ ++ IOMMUTLBEntry entry; ++ ++ entry.target_as = &address_space_memory; ++ entry.perm = IOMMU_NONE; ++ entry.granularity = IOMMU_INV_GRAN_PASID; ++ entry.flags = IOMMU_INV_FLAGS_ARCHID; ++ entry.arch_id = asid; ++ entry.iova = n->start; ++ entry.addr_mask = n->end - n->start; ++ ++ memory_region_notify_one(n, &entry); ++} ++ + /* invalidate an asid/iova tuple in all mr's */ + static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) + { +@@ -844,6 +867,22 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova) + } + } + ++static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid) ++{ ++ SMMUDevice *sdev; ++ ++ trace_smmuv3_s1_asid_inval(asid); ++ QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) { ++ IOMMUMemoryRegion *mr = &sdev->iommu; ++ IOMMUNotifier *n; ++ ++ IOMMU_NOTIFIER_FOREACH(n, mr) { ++ smmuv3_notify_asid(mr, n, asid); ++ } ++ } ++ smmu_iotlb_inv_asid(s, asid); ++} ++ + static int smmuv3_cmdq_consume(SMMUv3State *s) + { + SMMUState *bs = ARM_SMMU(s); +@@ -963,8 +1002,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + uint16_t asid = CMD_ASID(&cmd); + + trace_smmuv3_cmdq_tlbi_nh_asid(asid); +- smmu_inv_notifiers_all(&s->smmu_state); +- smmu_iotlb_inv_asid(bs, asid); ++ smmuv3_s1_asid_inval(bs, asid); + break; + } + case SMMU_CMD_TLBI_NH_ALL: +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index 0acedcedc6..4512d20115 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -44,6 +44,7 @@ smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t p + smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid %d (hits=%d, misses=%d, hit rate=%d)" + smmuv3_cmdq_tlbi_nh_va(int vmid, int asid, uint64_t addr, bool leaf) "vmid =%d asid =%d addr=0x%"PRIx64" leaf=%d" + smmuv3_cmdq_tlbi_nh_vaa(int vmid, uint64_t addr) "vmid =%d addr=0x%"PRIx64 ++smmuv3_s1_asid_inval(int asid) "asid=%d" + smmuv3_cmdq_tlbi_nh(void) "" + smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" + smmu_iotlb_cache_hit(uint16_t asid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch b/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch new file mode 100644 index 0000000000000000000000000000000000000000..a615b8664bd6b9c3603073bfd7ec0bb505e70ef8 --- /dev/null +++ b/hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch @@ -0,0 +1,147 @@ +From d0a1ce3c46246b6ef5510ac1d5c18308417ed525 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 Aug 2018 21:04:19 +0200 +Subject: [PATCH] hw/arm/smmuv3: Pass stage 1 configurations to the host + +In case PASID PciOps are set for the device we call +the set_pasid_table() callback on each STE update. + +This allows to pass the guest stage 1 configuration +to the host and apply it at physical level. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 77 +++++++++++++++++++++++++++++++++++---------- + hw/arm/trace-events | 2 +- + 2 files changed, 61 insertions(+), 18 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index c1caa6bc3a..3d2151857d 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -16,6 +16,10 @@ + * with this program; if not, see . + */ + ++#ifdef __linux__ ++#include "linux/iommu.h" ++#endif ++ + #include "qemu/osdep.h" + #include "hw/boards.h" + #include "sysemu/sysemu.h" +@@ -872,6 +876,60 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, + } + } + ++static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) ++{ ++#ifdef __linux__ ++ IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); ++ SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid}; ++ IOMMUConfig iommu_config = {}; ++ SMMUTransCfg *cfg; ++ SMMUDevice *sdev; ++ ++ if (!mr) { ++ return; ++ } ++ ++ sdev = container_of(mr, SMMUDevice, iommu); ++ ++ /* flush QEMU config cache */ ++ smmuv3_flush_config(sdev); ++ ++ if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) { ++ return; ++ } ++ ++ cfg = smmuv3_get_config(sdev, &event); ++ ++ if (!cfg) { ++ return; ++ } ++ ++ iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config); ++ iommu_config.pasid_cfg.version = PASID_TABLE_CFG_VERSION_1; ++ iommu_config.pasid_cfg.format = IOMMU_PASID_FORMAT_SMMUV3; ++ iommu_config.pasid_cfg.base_ptr = cfg->s1ctxptr; ++ iommu_config.pasid_cfg.pasid_bits = 0; ++ iommu_config.pasid_cfg.vendor_data.smmuv3.version = PASID_TABLE_SMMUV3_CFG_VERSION_1; ++ ++ if (cfg->disabled || cfg->bypassed) { ++ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_BYPASS; ++ } else if (cfg->aborted) { ++ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_ABORT; ++ } else { ++ iommu_config.pasid_cfg.config = IOMMU_PASID_CONFIG_TRANSLATE; ++ } ++ ++ trace_smmuv3_notify_config_change(mr->parent_obj.name, ++ iommu_config.pasid_cfg.config, ++ iommu_config.pasid_cfg.base_ptr); ++ ++ if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) { ++ error_report("Failed to pass PASID table to host for iommu mr %s (%m)", ++ mr->parent_obj.name); ++ } ++#endif ++} ++ + static void smmuv3_s1_asid_inval(SMMUState *s, uint16_t asid) + { + SMMUDevice *sdev; +@@ -938,22 +996,14 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + case SMMU_CMD_CFGI_STE: + { + uint32_t sid = CMD_SID(&cmd); +- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); +- SMMUDevice *sdev; + + if (CMD_SSEC(&cmd)) { + cmd_error = SMMU_CERROR_ILL; + break; + } + +- if (!mr) { +- break; +- } +- + trace_smmuv3_cmdq_cfgi_ste(sid); +- sdev = container_of(mr, SMMUDevice, iommu); +- smmuv3_flush_config(sdev); +- ++ smmuv3_notify_config_change(bs, sid); + break; + } + case SMMU_CMD_CFGI_STE_RANGE: /* same as SMMU_CMD_CFGI_ALL */ +@@ -970,14 +1020,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) + trace_smmuv3_cmdq_cfgi_ste_range(start, end); + + for (i = start; i <= end; i++) { +- IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, i); +- SMMUDevice *sdev; +- +- if (!mr) { +- continue; +- } +- sdev = container_of(mr, SMMUDevice, iommu); +- smmuv3_flush_config(sdev); ++ smmuv3_notify_config_change(bs, i); + } + break; + } +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index 4512d20115..cbbe2ccafd 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -53,4 +53,4 @@ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid %d" + smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" + smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint64_t iova) "iommu mr=%s asid=%d iova=0x%"PRIx64 +- ++smmuv3_notify_config_change(const char *name, uint8_t config, uint64_t s1ctxptr) "iommu mr=%s config=%d s1ctxptr=0x%"PRIx64 +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch b/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch new file mode 100644 index 0000000000000000000000000000000000000000..c363acb60c0fce72a986b6056aa74bb578b7a992 --- /dev/null +++ b/hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch @@ -0,0 +1,110 @@ +From 06e43bc658aa80bb5f4da3e43c1c13d4cab6ebdd Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 11 May 2021 10:08:16 +0800 +Subject: [PATCH] hw/arm/smmuv3: Post-load stage 1 configurations to the host + +In nested mode, we call the set_pasid_table() callback on each +STE update to pass the guest stage 1 configuration to the host +and apply it at physical level. + +In the case of live migration, we need to manually call the +set_pasid_table() to load the guest stage 1 configurations to +the host. If this operation fails, the migration fails. + +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 33 ++++++++++++++++++++++++++++----- + 1 file changed, 28 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index c26fba118c..f383143db1 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -876,7 +876,7 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, dma_addr_t iova, + } + } + +-static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) ++static int smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) + { + #ifdef __linux__ + IOMMUMemoryRegion *mr = smmu_iommu_mr(bs, sid); +@@ -884,9 +884,10 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) + IOMMUConfig iommu_config = {}; + SMMUTransCfg *cfg; + SMMUDevice *sdev; ++ int ret; + + if (!mr) { +- return; ++ return 0; + } + + sdev = container_of(mr, SMMUDevice, iommu); +@@ -895,13 +896,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) + smmuv3_flush_config(sdev); + + if (!pci_device_is_pasid_ops_set(sdev->bus, sdev->devfn)) { +- return; ++ return 0; + } + + cfg = smmuv3_get_config(sdev, &event); + + if (!cfg) { +- return; ++ return 0; + } + + iommu_config.pasid_cfg.argsz = sizeof(struct iommu_pasid_table_config); +@@ -923,10 +924,13 @@ static void smmuv3_notify_config_change(SMMUState *bs, uint32_t sid) + iommu_config.pasid_cfg.config, + iommu_config.pasid_cfg.base_ptr); + +- if (pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config)) { ++ ret = pci_device_set_pasid_table(sdev->bus, sdev->devfn, &iommu_config); ++ if (ret) { + error_report("Failed to pass PASID table to host for iommu mr %s (%m)", + mr->parent_obj.name); + } ++ ++ return ret; + #endif + } + +@@ -1494,6 +1498,24 @@ static void smmu_realize(DeviceState *d, Error **errp) + smmu_init_irq(s, dev); + } + ++static int smmuv3_post_load(void *opaque, int version_id) ++{ ++ SMMUv3State *s3 = opaque; ++ SMMUState *s = &(s3->smmu_state); ++ SMMUDevice *sdev; ++ int ret = 0; ++ ++ QLIST_FOREACH(sdev, &s->devices_with_notifiers, next) { ++ uint32_t sid = smmu_get_sid(sdev); ++ ret = smmuv3_notify_config_change(s, sid); ++ if (ret) { ++ break; ++ } ++ } ++ ++ return ret; ++} ++ + static const VMStateDescription vmstate_smmuv3_queue = { + .name = "smmuv3_queue", + .version_id = 1, +@@ -1512,6 +1534,7 @@ static const VMStateDescription vmstate_smmuv3 = { + .version_id = 1, + .minimum_version_id = 1, + .priority = MIG_PRI_IOMMU, ++ .post_load = smmuv3_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(features, SMMUv3State), + VMSTATE_UINT8(sid_size, SMMUv3State), +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch b/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch new file mode 100644 index 0000000000000000000000000000000000000000..1139feaed62705a6baebbecba25ad0355b761daf --- /dev/null +++ b/hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch @@ -0,0 +1,33 @@ +From eceb9213e23d15d5b4342b6a6a8368f4fec60c2f Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Mon, 19 Oct 2020 17:15:08 +0800 +Subject: [PATCH] hw/arm/smmuv3: Set the restoration priority of the vSMMUv3 + explicitly + +Ensure the vSMMUv3 will be restored before all PCIe devices so that DMA +translation can work properly during migration. + +Signed-off-by: Zenghui Yu +Message-id: 20201019091508.197-1-yuzenghui@huawei.com +Acked-by: Eric Auger +Signed-off-by: Peter Maydell +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 7911944c59..3b5723e1e1 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1424,6 +1424,7 @@ static const VMStateDescription vmstate_smmuv3 = { + .name = "smmuv3", + .version_id = 1, + .minimum_version_id = 1, ++ .priority = MIG_PRI_IOMMU, + .fields = (VMStateField[]) { + VMSTATE_UINT32(features, SMMUv3State), + VMSTATE_UINT8(sid_size, SMMUv3State), +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch b/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ed3590b6e3c3863486db0082be983a7b7d4968c --- /dev/null +++ b/hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch @@ -0,0 +1,45 @@ +From 6fc85d8a6022d94ffec4cc118472cde583706bfb Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 Aug 2018 20:56:44 +0200 +Subject: [PATCH] hw/arm/smmuv3: Store the PASID table GPA in the translation + config + +For VFIO integration we will need to pass the Context Descriptor (CD) +table GPA to the host. The CD table is also referred to as the PASID +table. Its GPA corresponds to the s1ctrptr field of the Stream Table +Entry. So let's decode and store it in the configuration structure. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 1 + + include/hw/arm/smmu-common.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 83d59b6d28..f8e721f949 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -352,6 +352,7 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, + "SMMUv3 S1 stalling fault model not allowed yet\n"); + goto bad_ste; + } ++ cfg->s1ctxptr = STE_CTXPTR(ste); + return 0; + + bad_ste: +diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h +index 1f37844e5c..353668f4ea 100644 +--- a/include/hw/arm/smmu-common.h ++++ b/include/hw/arm/smmu-common.h +@@ -68,6 +68,7 @@ typedef struct SMMUTransCfg { + uint8_t tbi; /* Top Byte Ignore */ + uint16_t asid; + SMMUTransTableInfo tt[2]; ++ dma_addr_t s1ctxptr; + uint32_t iotlb_hits; /* counts IOTLB hits for this asid */ + uint32_t iotlb_misses; /* counts IOTLB misses for this asid */ + } SMMUTransCfg; +-- +2.27.0 + diff --git a/hw-arm-smmuv3-Support-16K-translation-granule.patch b/hw-arm-smmuv3-Support-16K-translation-granule.patch new file mode 100644 index 0000000000000000000000000000000000000000..08c4bc5603401f6e5735daa6767dfa2aa2785255 --- /dev/null +++ b/hw-arm-smmuv3-Support-16K-translation-granule.patch @@ -0,0 +1,49 @@ +From 008dec30dea19950ff48a34c54441d065c1f228b Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Wed, 31 Mar 2021 14:47:13 +0800 +Subject: [PATCH] hw/arm/smmuv3: Support 16K translation granule + +The driver can query some bits in SMMUv3 IDR5 to learn which +translation granules are supported. Arm recommends that SMMUv3 +implementations support at least 4K and 64K granules. But in +the vSMMUv3, there seems to be no reason not to support 16K +translation granule. In addition, if 16K is not supported, +vSVA will failed to be enabled in the future for 16K guest +kernel. So it'd better to support it. + +Signed-off-by: Kunkun Jiang +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Signed-off-by: Peter Maydell +--- + hw/arm/smmuv3.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index e96d5beb9a..7911944c59 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -254,8 +254,9 @@ static void smmuv3_init_regs(SMMUv3State *s) + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, EVENTQS, SMMU_EVENTQS); + s->idr[1] = FIELD_DP32(s->idr[1], IDR1, CMDQS, SMMU_CMDQS); + +- /* 4K and 64K granule support */ ++ /* 4K, 16K and 64K granule support */ + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN4K, 1); ++ s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN16K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, GRAN64K, 1); + s->idr[5] = FIELD_DP32(s->idr[5], IDR5, OAS, SMMU_IDR5_OAS); /* 44 bits */ + +@@ -480,7 +481,8 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) + + tg = CD_TG(cd, i); + tt->granule_sz = tg2granule(tg, i); +- if ((tt->granule_sz != 12 && tt->granule_sz != 16) || CD_ENDI(cd)) { ++ if ((tt->granule_sz != 12 && tt->granule_sz != 14 && ++ tt->granule_sz != 16) || CD_ENDI(cd)) { + goto bad_cd; + } + +-- +2.27.0 + diff --git a/hw-arm-virt-Add-memory-hotplug-framework.patch b/hw-arm-virt-Add-memory-hotplug-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..dcb0f21f5b63e10636d889f6cae99a4d738d1d0e --- /dev/null +++ b/hw-arm-virt-Add-memory-hotplug-framework.patch @@ -0,0 +1,130 @@ +From e14fadc66d488ad10a10a2076721b72cc239ded9 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 18 Sep 2019 14:06:26 +0100 +Subject: [PATCH] hw/arm/virt: Add memory hotplug framework + +This patch adds the memory hot-plug/hot-unplug infrastructure +in machvirt. The device memory is not yet exposed to the Guest +either through DT or ACPI and hence both cold/hot plug of memory +is explicitly disabled for now. + +Signed-off-by: Eric Auger +Signed-off-by: Kwangwoo Lee +Signed-off-by: Shameer Kolothum +Reviewed-by: Peter Maydell +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-5-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/arm/Kconfig | 2 ++ + hw/arm/virt.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 54 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index ab65ecd216..84961c17ab 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -20,6 +20,8 @@ config ARM_VIRT + select SMBIOS + select VIRTIO_MMIO + select ACPI_PCI ++ select MEM_DEVICE ++ select DIMM + + config CHEETAH + bool +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 23d72aed97..c7c07fe3ac 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -65,6 +65,8 @@ + #include "hw/arm/smmuv3.h" + #include "hw/acpi/acpi.h" + #include "target/arm/internals.h" ++#include "hw/mem/pc-dimm.h" ++#include "hw/mem/nvdimm.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -1998,6 +2000,42 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ ++ /* ++ * The device memory is not yet exposed to the Guest either through ++ * DT or ACPI and hence both cold/hot plug of memory is explicitly ++ * disabled for now. ++ */ ++ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { ++ error_setg(errp, "memory cold/hot plug is not yet supported"); ++ return; ++ } ++ ++ pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp); ++} ++ ++static void virt_memory_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ Error *local_err = NULL; ++ ++ pc_dimm_plug(PC_DIMM(dev), MACHINE(vms), &local_err); ++ ++ error_propagate(errp, local_err); ++} ++ ++static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { ++ virt_memory_pre_plug(hotplug_dev, dev, errp); ++ } ++} ++ + static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -2009,12 +2047,23 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + SYS_BUS_DEVICE(dev)); + } + } ++ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { ++ virt_memory_plug(hotplug_dev, dev, errp); ++ } ++} ++ ++static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ error_setg(errp, "device unplug request for unsupported device" ++ " type: %s", object_get_typename(OBJECT(dev))); + } + + static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + DeviceState *dev) + { +- if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { ++ if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) || ++ (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) { + return HOTPLUG_HANDLER(machine); + } + +@@ -2078,7 +2127,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + mc->kvm_type = virt_kvm_type; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; + hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; + } + +-- +2.19.1 diff --git a/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch b/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b32b2a01929189ff6f89e94f011d4d9cc3811a3b --- /dev/null +++ b/hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch @@ -0,0 +1,252 @@ +From ce813d8daa2e01df52509f4bb52b9ab774408706 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:27 +0100 +Subject: [PATCH] hw/arm/virt: Enable device memory cold/hot plug with ACPI + boot + +This initializes the GED device with base memory and irq, configures +ged memory hotplug event and builds the corresponding aml code. With +this, both hot and cold plug of device memory is enabled now for Guest +with ACPI boot. Memory cold plug support with Guest DT boot is not yet +supported. + +As DSDT table gets changed by this, update bios-tables-test-allowed-diff.h +to avoid "make check" failure. + +Signed-off-by: Shameer Kolothum +Message-Id: <20190918130633.4872-6-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +--- + hw/arm/Kconfig | 2 ++ + hw/arm/virt-acpi-build.c | 21 ++++++++++++++ + hw/arm/virt.c | 59 +++++++++++++++++++++++++++++++++++----- + include/hw/arm/virt.h | 4 +++ + 4 files changed, 79 insertions(+), 7 deletions(-) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 84961c17ab..ad7f7c089b 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -22,6 +22,8 @@ config ARM_VIRT + select ACPI_PCI + select MEM_DEVICE + select DIMM ++ select ACPI_MEMORY_HOTPLUG ++ select ACPI_HW_REDUCED + + config CHEETAH + bool +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index fe54411f6a..fca53ae01f 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -40,6 +40,8 @@ + #include "hw/acpi/aml-build.h" + #include "hw/acpi/utils.h" + #include "hw/acpi/pci.h" ++#include "hw/acpi/memory_hotplug.h" ++#include "hw/acpi/generic_event_device.h" + #include "hw/pci/pcie_host.h" + #include "hw/pci/pci.h" + #include "hw/arm/virt.h" +@@ -779,6 +781,7 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + Aml *scope, *dsdt; ++ MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; + const int *irqmap = vms->irqmap; + +@@ -803,6 +806,24 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + vms->highmem, vms->highmem_ecam); + acpi_dsdt_add_gpio(scope, &memmap[VIRT_GPIO], + (irqmap[VIRT_GPIO] + ARM_SPI_BASE)); ++ if (vms->acpi_dev) { ++ build_ged_aml(scope, "\\_SB."GED_DEVICE, ++ HOTPLUG_HANDLER(vms->acpi_dev), ++ irqmap[VIRT_ACPI_GED] + ARM_SPI_BASE, AML_SYSTEM_MEMORY, ++ memmap[VIRT_ACPI_GED].base); ++ } ++ ++ if (vms->acpi_dev) { ++ uint32_t event = object_property_get_uint(OBJECT(vms->acpi_dev), ++ "ged-event", &error_abort); ++ ++ if (event & ACPI_GED_MEM_HOTPLUG_EVT) { ++ build_memory_hotplug_aml(scope, ms->ram_slots, "\\_SB", NULL, ++ AML_SYSTEM_MEMORY, ++ memmap[VIRT_PCDIMM_ACPI].base); ++ } ++ } ++ + acpi_dsdt_add_power_button(scope); + + aml_append(dsdt, scope); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c7c07fe3ac..8ccabd5159 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -67,6 +67,7 @@ + #include "target/arm/internals.h" + #include "hw/mem/pc-dimm.h" + #include "hw/mem/nvdimm.h" ++#include "hw/acpi/generic_event_device.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -137,6 +138,8 @@ static const MemMapEntry base_memmap[] = { + [VIRT_GPIO] = { 0x09030000, 0x00001000 }, + [VIRT_SECURE_UART] = { 0x09040000, 0x00001000 }, + [VIRT_SMMU] = { 0x09050000, 0x00020000 }, ++ [VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN }, ++ [VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN }, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +@@ -173,6 +176,7 @@ static const int a15irqmap[] = { + [VIRT_PCIE] = 3, /* ... to 6 */ + [VIRT_GPIO] = 7, + [VIRT_SECURE_UART] = 8, ++ [VIRT_ACPI_GED] = 9, + [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */ + [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */ + [VIRT_SMMU] = 74, /* ...to 74 + NUM_SMMU_IRQS - 1 */ +@@ -630,6 +634,29 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) + } + } + ++static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) ++{ ++ DeviceState *dev; ++ MachineState *ms = MACHINE(vms); ++ int irq = vms->irqmap[VIRT_ACPI_GED]; ++ uint32_t event = 0; ++ ++ if (ms->ram_slots) { ++ event = ACPI_GED_MEM_HOTPLUG_EVT; ++ } ++ ++ dev = qdev_create(NULL, TYPE_ACPI_GED); ++ qdev_prop_set_uint32(dev, "ged-event", event); ++ ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irq]); ++ ++ qdev_init_nofail(dev); ++ ++ return dev; ++} ++ + static void create_its(VirtMachineState *vms, DeviceState *gicdev) + { + const char *itsclass = its_class_name(); +@@ -1603,6 +1630,7 @@ static void machvirt_init(MachineState *machine) + MemoryRegion *ram = g_new(MemoryRegion, 1); + bool firmware_loaded; + bool aarch64 = true; ++ bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; + +@@ -1824,6 +1852,10 @@ static void machvirt_init(MachineState *machine) + + create_gpio(vms, pic); + ++ if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { ++ vms->acpi_dev = create_acpi_ged(vms, pic); ++ } ++ + /* Create mmio transports, so the user can create virtio backends + * (which will be automatically plugged in to the transports). If + * no backend is created the transport will just sit harmlessly idle. +@@ -2003,14 +2035,17 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); + +- /* +- * The device memory is not yet exposed to the Guest either through +- * DT or ACPI and hence both cold/hot plug of memory is explicitly +- * disabled for now. +- */ +- if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +- error_setg(errp, "memory cold/hot plug is not yet supported"); ++ if (is_nvdimm) { ++ error_setg(errp, "nvdimm is not yet supported"); ++ return; ++ } ++ ++ if (!vms->acpi_dev) { ++ error_setg(errp, ++ "memory hotplug is not enabled: missing acpi-ged device"); + return; + } + +@@ -2020,11 +2055,18 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + static void virt_memory_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { ++ HotplugHandlerClass *hhc; + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + Error *local_err = NULL; + + pc_dimm_plug(PC_DIMM(dev), MACHINE(vms), &local_err); ++ if (local_err) { ++ goto out; ++ } + ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &error_abort); ++out: + error_propagate(errp, local_err); + } + +@@ -2231,8 +2273,11 @@ DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) + + static void virt_machine_4_0_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + virt_machine_4_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); ++ vmc->no_ged = true; + } + DEFINE_VIRT_MACHINE(4, 0) + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index a9d6977afc..0350285136 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -78,6 +78,8 @@ enum { + VIRT_GPIO, + VIRT_SECURE_UART, + VIRT_SECURE_MEM, ++ VIRT_PCDIMM_ACPI, ++ VIRT_ACPI_GED, + VIRT_LOWMEMMAP_LAST, + }; + +@@ -107,6 +109,7 @@ typedef struct { + bool claim_edge_triggered_timers; + bool smbios_old_sys_ver; + bool no_highmem_ecam; ++ bool no_ged; /* Machines < 4.1 has no support for ACPI GED device */ + bool kvm_no_adjvtime; + } VirtMachineClass; + +@@ -135,6 +138,7 @@ typedef struct { + uint32_t iommu_phandle; + int psci_conduit; + hwaddr highest_gpa; ++ DeviceState *acpi_dev; + } VirtMachineState; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +-- +2.19.1 diff --git a/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch b/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e3befaf62920ca12cae4c8ead6d731800ef79a8 --- /dev/null +++ b/hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch @@ -0,0 +1,170 @@ +From 3a0af1446395e74476a763ca12713b28c099a144 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 6 Apr 2020 12:50:54 +0800 +Subject: [PATCH] hw/arm/virt: Factor out some CPU init codes to pre_plug hook + +The init path of hotplugged CPU is pre_plug/realize/plug, so we +must move these init code in machvirt_init to pre_plug hook, to +let them be shared by all CPUs. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 108 +++++++++++++++++++++++++++----------------------- + 1 file changed, 58 insertions(+), 50 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 64532b61b2..83f4887e57 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -196,6 +196,8 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static MemoryRegion *secure_sysmem; ++ + static bool cpu_type_valid(const char *cpu) + { + int i; +@@ -1629,7 +1631,6 @@ static void machvirt_init(MachineState *machine) + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; + MemoryRegion *sysmem = get_system_memory(); +- MemoryRegion *secure_sysmem = NULL; + int n, virt_max_cpus; + MemoryRegion *ram = g_new(MemoryRegion, 1); + bool firmware_loaded; +@@ -1752,57 +1753,10 @@ static void machvirt_init(MachineState *machine) + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +- object_property_set_int(cpuobj, possible_cpus->cpus[n].arch_id, +- "mp-affinity", NULL); ++ aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + + cs = CPU(cpuobj); + cs->cpu_index = n; +- +- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), +- &error_fatal); +- +- aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); +- +- if (!vms->secure) { +- object_property_set_bool(cpuobj, false, "has_el3", NULL); +- } +- +- if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) { +- object_property_set_bool(cpuobj, false, "has_el2", NULL); +- } +- +- if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { +- object_property_set_int(cpuobj, vms->psci_conduit, +- "psci-conduit", NULL); +- +- /* Secondary CPUs start in PSCI powered-down state */ +- if (n > 0) { +- object_property_set_bool(cpuobj, true, +- "start-powered-off", NULL); +- } +- } +- +- if (vmc->kvm_no_adjvtime && +- object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { +- object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); +- } +- +- if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { +- object_property_set_bool(cpuobj, false, "pmu", NULL); +- } +- +- if (object_property_find(cpuobj, "reset-cbar", NULL)) { +- object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base, +- "reset-cbar", &error_abort); +- } +- +- object_property_set_link(cpuobj, OBJECT(sysmem), "memory", +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, OBJECT(secure_sysmem), +- "secure-memory", &error_abort); +- } +- + object_property_set_bool(cpuobj, true, "realized", &error_fatal); + object_unref(cpuobj); + } +@@ -2089,10 +2043,16 @@ out: + static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- CPUState *cs = CPU(dev); + ARMCPUTopoInfo topo; ++ Object *cpuobj = OBJECT(dev); ++ CPUState *cs = CPU(dev); + ARMCPU *cpu = ARM_CPU(dev); + MachineState *ms = MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(hotplug_dev); ++ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); ++ MemoryRegion *sysmem = get_system_memory(); + int smp_cores = ms->smp.cores; + int smp_threads = ms->smp.threads; + +@@ -2145,6 +2105,54 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, + return; + } + cpu->thread_id = topo.smt_id; ++ ++ /* Init some properties */ ++ ++ object_property_set_int(cpuobj, possible_cpus->cpus[cs->cpu_index].arch_id, ++ "mp-affinity", NULL); ++ ++ numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), ++ &error_fatal); ++ ++ if (!vms->secure) { ++ object_property_set_bool(cpuobj, false, "has_el3", NULL); ++ } ++ ++ if (!vms->virt && object_property_find(cpuobj, "has_el2", NULL)) { ++ object_property_set_bool(cpuobj, false, "has_el2", NULL); ++ } ++ ++ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { ++ object_property_set_int(cpuobj, vms->psci_conduit, ++ "psci-conduit", NULL); ++ ++ /* Secondary CPUs start in PSCI powered-down state */ ++ if (cs->cpu_index > 0) { ++ object_property_set_bool(cpuobj, true, ++ "start-powered-off", NULL); ++ } ++ } ++ ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { ++ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); ++ } ++ ++ if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { ++ object_property_set_bool(cpuobj, false, "pmu", NULL); ++ } ++ ++ if (object_property_find(cpuobj, "reset-cbar", NULL)) { ++ object_property_set_int(cpuobj, vms->memmap[VIRT_CPUPERIPHS].base, ++ "reset-cbar", &error_abort); ++ } ++ ++ object_property_set_link(cpuobj, OBJECT(sysmem), "memory", ++ &error_abort); ++ if (vms->secure) { ++ object_property_set_link(cpuobj, OBJECT(secure_sysmem), ++ "secure-memory", &error_abort); ++ } + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, +-- +2.19.1 diff --git a/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch b/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..c124df5394121fdb0415b3b85d04fc3417a747aa --- /dev/null +++ b/hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch @@ -0,0 +1,73 @@ +From acc5162f1d1591ee4830f9b67934fc6d8a9ebbc1 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 8 Sep 2020 22:09:44 +0800 +Subject: [PATCH] hw/arm/virt: Init PMU for hotplugged vCPU + +Factor out PMU init code from fdt_add_pmu_nodes and +do PMU init for hotplugged vCPU. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 7afc6c5e..7506d0ff 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -605,6 +605,23 @@ static void fdt_add_gic_node(VirtMachineState *vms) + g_free(nodename); + } + ++static bool virt_cpu_init_pmu(const VirtMachineState *vms, CPUState *cpu) ++{ ++ ARMCPU *armcpu = ARM_CPU(cpu); ++ ++ if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { ++ return false; ++ } ++ if (kvm_enabled()) { ++ if (kvm_irqchip_in_kernel()) { ++ kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); ++ } ++ kvm_arm_pmu_init(cpu); ++ } ++ ++ return true; ++} ++ + static void fdt_add_pmu_nodes(const VirtMachineState *vms) + { + CPUState *cpu; +@@ -612,16 +629,9 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) + uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI; + + CPU_FOREACH(cpu) { +- armcpu = ARM_CPU(cpu); +- if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU)) { ++ if (!virt_cpu_init_pmu(vms, cpu)) { + return; + } +- if (kvm_enabled()) { +- if (kvm_irqchip_in_kernel()) { +- kvm_arm_pmu_set_irq(cpu, PPI(VIRTUAL_PMU_IRQ)); +- } +- kvm_arm_pmu_init(cpu); +- } + } + + if (vms->gic_version == 2) { +@@ -2248,6 +2258,9 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, + agcc->cpu_hotplug_realize(gicv3, ncpu); + connect_gic_cpu_irqs(vms, ncpu); + ++ /* Init PMU part */ ++ virt_cpu_init_pmu(vms, cs); ++ + /* Register CPU reset and trigger it manually */ + cpu_synchronize_state(cs); + cpu_hotplug_register_reset(ncpu); +-- +2.23.0 + + diff --git a/hw-arm-virt-Introduce-cpu-topology-support.patch b/hw-arm-virt-Introduce-cpu-topology-support.patch index 027a5112effa77c9d37642f88cf9d00120bc6f47..932f467fe274453668edf80bac5d023817d6f95b 100644 --- a/hw-arm-virt-Introduce-cpu-topology-support.patch +++ b/hw-arm-virt-Introduce-cpu-topology-support.patch @@ -1,6 +1,6 @@ -From 123b4eb3cb7b9b4e3e0705a9b5f974b37d3b8431 Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Mon, 5 Aug 2019 15:04:31 +0800 +From 73fc4af05ebe12d77915e6b3c85c48f5e0c432f3 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 22 Apr 2020 19:23:27 +0800 Subject: [PATCH] hw/arm/virt: Introduce cpu topology support Add topology support for guest vcpu by cpu-map in dtb when the guest is booted @@ -11,13 +11,13 @@ Signed-off-by: zhanghailiang (picked-from https://patchwork.ozlabs.org/cover/939301/ which is pushed by Andrew Jones ) --- - device_tree.c | 32 +++++++++++++++++++++++ - hw/acpi/aml-build.c | 50 ++++++++++++++++++++++++++++++++++++ + device_tree.c | 32 ++++++++++++++++++++++ + hw/acpi/aml-build.c | 53 ++++++++++++++++++++++++++++++++++++ hw/arm/virt-acpi-build.c | 4 +++ - hw/arm/virt.c | 29 +++++++++++++++++++++ + hw/arm/virt.c | 32 +++++++++++++++++++++- include/hw/acpi/aml-build.h | 2 ++ include/sysemu/device_tree.h | 1 + - 6 files changed, 118 insertions(+) + 6 files changed, 123 insertions(+), 1 deletion(-) diff --git a/device_tree.c b/device_tree.c index f8b46b3c..03906a14 100644 @@ -26,7 +26,7 @@ index f8b46b3c..03906a14 100644 @@ -524,6 +524,38 @@ int qemu_fdt_add_subnode(void *fdt, const char *name) return retval; } - + +/** + * qemu_fdt_add_path + * @fdt: Flattened Device Tree @@ -63,7 +63,7 @@ index f8b46b3c..03906a14 100644 { const char *dumpdtb = qemu_opt_get(qemu_get_machine_opts(), "dumpdtb"); diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 73f97751..9d39ad10 100644 +index 73f97751..f2c8c28f 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -25,6 +25,7 @@ @@ -71,13 +71,13 @@ index 73f97751..9d39ad10 100644 #include "qemu/bitops.h" #include "sysemu/numa.h" +#include "sysemu/cpus.h" - + static GArray *build_alloc_array(void) { -@@ -51,6 +52,55 @@ static void build_append_array(GArray *array, GArray *val) +@@ -51,6 +52,58 @@ static void build_append_array(GArray *array, GArray *val) g_array_append_vals(array, val->data, val->len); } - + +/* + * ACPI 6.2 Processor Properties Topology Table (PPTT) + */ @@ -97,6 +97,9 @@ index 73f97751..9d39ad10 100644 +{ + int pptt_start = table_data->len; + int uid = 0, cpus = 0, socket; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + @@ -128,28 +131,28 @@ index 73f97751..9d39ad10 100644 +} + #define ACPI_NAMESEG_LEN 4 - + static void diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c -index 33a8e2e3..18653e6d 100644 +index 29494ebd..fe54411f 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c -@@ -870,6 +870,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) +@@ -848,6 +848,10 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables_blob); build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); - + + acpi_add_table(table_offsets, tables_blob); + + build_pptt(tables_blob, tables->linker, vms->smp_cpus); + acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, vms); - + diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 57a78b16..16700a2e 100644 +index 0fa355ba..272455bc 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -42,6 +42,7 @@ +@@ -44,6 +44,7 @@ #include "net/net.h" #include "sysemu/device_tree.h" #include "sysemu/numa.h" @@ -157,10 +160,20 @@ index 57a78b16..16700a2e 100644 #include "sysemu/sysemu.h" #include "sysemu/kvm.h" #include "hw/loader.h" -@@ -364,8 +365,36 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) +@@ -312,7 +313,8 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + int cpu; + int addr_cells = 1; + const MachineState *ms = MACHINE(vms); +- ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; + /* + * From Documentation/devicetree/bindings/arm/cpus.txt + * On ARM v8 64-bit systems value should be set to 2, +@@ -368,8 +370,36 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) ms->possible_cpus->cpus[cs->cpu_index].props.node_id); } - + + qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(vms->fdt)); + @@ -192,7 +205,7 @@ index 57a78b16..16700a2e 100644 + g_free(cpu_path); + } } - + static void fdt_add_its_gic_node(VirtMachineState *vms) diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index 375335ab..bfb0b100 100644 @@ -201,11 +214,11 @@ index 375335ab..bfb0b100 100644 @@ -417,6 +417,8 @@ build_append_gas_from_struct(GArray *table, const struct AcpiGenericAddress *s) void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base, uint64_t len, int node, MemoryAffinityFlags flags); - + +void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus); + void build_slit(GArray *table_data, BIOSLinker *linker); - + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, diff --git a/include/sysemu/device_tree.h b/include/sysemu/device_tree.h index c16fd69b..d62fc873 100644 @@ -216,9 +229,8 @@ index c16fd69b..d62fc873 100644 int qemu_fdt_nop_node(void *fdt, const char *node_path); int qemu_fdt_add_subnode(void *fdt, const char *name); +int qemu_fdt_add_path(void *fdt, const char *path); - + #define qemu_fdt_setprop_cells(fdt, node_path, property, ...) \ do { \ --- -2.19.1 - +-- +2.23.0 diff --git a/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch b/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch new file mode 100644 index 0000000000000000000000000000000000000000..262cb508bcb8ba48bf93a3875957f2c9ace7698d --- /dev/null +++ b/hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch @@ -0,0 +1,402 @@ +From 5d1be90750551f1debf5767d7a6e2b9c50054c05 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 9 Dec 2019 10:03:06 +0100 +Subject: [PATCH] hw/arm/virt: Simplify by moving the gic in the machine state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Make the gic a field in the machine state, and instead of filling +an array of qemu_irq and passing it around, directly call +qdev_get_gpio_in() on the gic field. + +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Luc Michel +Message-id: 20191209090306.20433-1-philmd@redhat.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +--- + hw/arm/virt.c | 109 +++++++++++++++++++++--------------------- + include/hw/arm/virt.h | 1 + + 2 files changed, 55 insertions(+), 55 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 18321e522b..8638aeedb7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -634,7 +634,7 @@ static void fdt_add_pmu_nodes(const VirtMachineState *vms) + } + } + +-static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) ++static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + { + DeviceState *dev; + MachineState *ms = MACHINE(vms); +@@ -650,14 +650,14 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); +- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irq]); ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + qdev_init_nofail(dev); + + return dev; + } + +-static void create_its(VirtMachineState *vms, DeviceState *gicdev) ++static void create_its(VirtMachineState *vms) + { + const char *itsclass = its_class_name(); + DeviceState *dev; +@@ -669,7 +669,7 @@ static void create_its(VirtMachineState *vms, DeviceState *gicdev) + + dev = qdev_create(NULL, itsclass); + +- object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3", ++ object_property_set_link(OBJECT(dev), OBJECT(vms->gic), "parent-gicv3", + &error_abort); + qdev_init_nofail(dev); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_GIC_ITS].base); +@@ -677,7 +677,7 @@ static void create_its(VirtMachineState *vms, DeviceState *gicdev) + fdt_add_its_gic_node(vms); + } + +-static void create_v2m(VirtMachineState *vms, qemu_irq *pic) ++static void create_v2m(VirtMachineState *vms) + { + int i; + int irq = vms->irqmap[VIRT_GIC_V2M]; +@@ -690,17 +690,17 @@ static void create_v2m(VirtMachineState *vms, qemu_irq *pic) + qdev_init_nofail(dev); + + for (i = 0; i < NUM_GICV2M_SPIS; i++) { +- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, ++ qdev_get_gpio_in(vms->gic, irq + i)); + } + + fdt_add_v2m_gic_node(vms); + } + +-static void create_gic(VirtMachineState *vms, qemu_irq *pic) ++static void create_gic(VirtMachineState *vms) + { + MachineState *ms = MACHINE(vms); + /* We create a standalone GIC */ +- DeviceState *gicdev; + SysBusDevice *gicbusdev; + const char *gictype; + int type = vms->gic_version, i; +@@ -709,15 +709,15 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) + + gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); + +- gicdev = qdev_create(NULL, gictype); +- qdev_prop_set_uint32(gicdev, "revision", type); +- qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus); ++ vms->gic = qdev_create(NULL, gictype); ++ qdev_prop_set_uint32(vms->gic, "revision", type); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +- qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32); ++ qdev_prop_set_uint32(vms->gic, "num-irq", NUM_IRQS + 32); + if (!kvm_irqchip_in_kernel()) { +- qdev_prop_set_bit(gicdev, "has-security-extensions", vms->secure); ++ qdev_prop_set_bit(vms->gic, "has-security-extensions", vms->secure); + } + + if (type == 3) { +@@ -727,25 +727,25 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +- qdev_prop_set_uint32(gicdev, "len-redist-region-count", ++ qdev_prop_set_uint32(vms->gic, "len-redist-region-count", + nb_redist_regions); +- qdev_prop_set_uint32(gicdev, "redist-region-count[0]", redist0_count); ++ qdev_prop_set_uint32(vms->gic, "redist-region-count[0]", redist0_count); + + if (nb_redist_regions == 2) { + uint32_t redist1_capacity = + vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE; + +- qdev_prop_set_uint32(gicdev, "redist-region-count[1]", ++ qdev_prop_set_uint32(vms->gic, "redist-region-count[1]", + MIN(smp_cpus - redist0_count, redist1_capacity)); + } + } else { + if (!kvm_irqchip_in_kernel()) { +- qdev_prop_set_bit(gicdev, "has-virtualization-extensions", ++ qdev_prop_set_bit(vms->gic, "has-virtualization-extensions", + vms->virt); + } + } +- qdev_init_nofail(gicdev); +- gicbusdev = SYS_BUS_DEVICE(gicdev); ++ qdev_init_nofail(vms->gic); ++ gicbusdev = SYS_BUS_DEVICE(vms->gic); + sysbus_mmio_map(gicbusdev, 0, vms->memmap[VIRT_GIC_DIST].base); + if (type == 3) { + sysbus_mmio_map(gicbusdev, 1, vms->memmap[VIRT_GIC_REDIST].base); +@@ -781,23 +781,23 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) + + for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { + qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(gicdev, ++ qdev_get_gpio_in(vms->gic, + ppibase + timer_irq[irq])); + } + + if (type == 3) { +- qemu_irq irq = qdev_get_gpio_in(gicdev, ++ qemu_irq irq = qdev_get_gpio_in(vms->gic, + ppibase + ARCH_GIC_MAINT_IRQ); + qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", + 0, irq); + } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(gicdev, ++ qemu_irq irq = qdev_get_gpio_in(vms->gic, + ppibase + ARCH_GIC_MAINT_IRQ); + sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); + } + + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(gicdev, ppibase ++ qdev_get_gpio_in(vms->gic, ppibase + + VIRTUAL_PMU_IRQ)); + + sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +@@ -809,20 +809,16 @@ static void create_gic(VirtMachineState *vms, qemu_irq *pic) + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); + } + +- for (i = 0; i < NUM_IRQS; i++) { +- pic[i] = qdev_get_gpio_in(gicdev, i); +- } +- + fdt_add_gic_node(vms); + + if (type == 3 && vms->its) { +- create_its(vms, gicdev); ++ create_its(vms); + } else if (type == 2) { +- create_v2m(vms, pic); ++ create_v2m(vms); + } + } + +-static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, ++static void create_uart(const VirtMachineState *vms, int uart, + MemoryRegion *mem, Chardev *chr) + { + char *nodename; +@@ -838,7 +834,7 @@ static void create_uart(const VirtMachineState *vms, qemu_irq *pic, int uart, + qdev_init_nofail(dev); + memory_region_add_subregion(mem, base, + sysbus_mmio_get_region(s, 0)); +- sysbus_connect_irq(s, 0, pic[irq]); ++ sysbus_connect_irq(s, 0, qdev_get_gpio_in(vms->gic, irq)); + + nodename = g_strdup_printf("/pl011@%" PRIx64, base); + qemu_fdt_add_subnode(vms->fdt, nodename); +@@ -880,7 +876,7 @@ static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) + memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); + } + +-static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) ++static void create_rtc(const VirtMachineState *vms) + { + char *nodename; + hwaddr base = vms->memmap[VIRT_RTC].base; +@@ -888,7 +884,7 @@ static void create_rtc(const VirtMachineState *vms, qemu_irq *pic) + int irq = vms->irqmap[VIRT_RTC]; + const char compat[] = "arm,pl031\0arm,primecell"; + +- sysbus_create_simple("pl031", base, pic[irq]); ++ sysbus_create_simple("pl031", base, qdev_get_gpio_in(vms->gic, irq)); + + nodename = g_strdup_printf("/pl031@%" PRIx64, base); + qemu_fdt_add_subnode(vms->fdt, nodename); +@@ -916,7 +912,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) + } + } + +-static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) ++static void create_gpio(const VirtMachineState *vms) + { + char *nodename; + DeviceState *pl061_dev; +@@ -925,7 +921,8 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) + int irq = vms->irqmap[VIRT_GPIO]; + const char compat[] = "arm,pl061\0arm,primecell"; + +- pl061_dev = sysbus_create_simple("pl061", base, pic[irq]); ++ pl061_dev = sysbus_create_simple("pl061", base, ++ qdev_get_gpio_in(vms->gic, irq)); + + uint32_t phandle = qemu_fdt_alloc_phandle(vms->fdt); + nodename = g_strdup_printf("/pl061@%" PRIx64, base); +@@ -959,7 +956,7 @@ static void create_gpio(const VirtMachineState *vms, qemu_irq *pic) + g_free(nodename); + } + +-static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic) ++static void create_virtio_devices(const VirtMachineState *vms) + { + int i; + hwaddr size = vms->memmap[VIRT_MMIO].size; +@@ -995,7 +992,8 @@ static void create_virtio_devices(const VirtMachineState *vms, qemu_irq *pic) + int irq = vms->irqmap[VIRT_MMIO] + i; + hwaddr base = vms->memmap[VIRT_MMIO].base + i * size; + +- sysbus_create_simple("virtio-mmio", base, pic[irq]); ++ sysbus_create_simple("virtio-mmio", base, ++ qdev_get_gpio_in(vms->gic, irq)); + } + + /* We add dtb nodes in reverse order so that they appear in the finished +@@ -1244,7 +1242,7 @@ static void create_pcie_irq_map(const VirtMachineState *vms, + 0x7 /* PCI irq */); + } + +-static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, ++static void create_smmu(const VirtMachineState *vms, + PCIBus *bus) + { + char *node; +@@ -1267,7 +1265,8 @@ static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, + qdev_init_nofail(dev); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, base); + for (i = 0; i < NUM_SMMU_IRQS; i++) { +- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, ++ qdev_get_gpio_in(vms->gic, irq + i)); + } + + node = g_strdup_printf("/smmuv3@%" PRIx64, base); +@@ -1294,7 +1293,7 @@ static void create_smmu(const VirtMachineState *vms, qemu_irq *pic, + g_free(node); + } + +-static void create_pcie(VirtMachineState *vms, qemu_irq *pic) ++static void create_pcie(VirtMachineState *vms) + { + hwaddr base_mmio = vms->memmap[VIRT_PCIE_MMIO].base; + hwaddr size_mmio = vms->memmap[VIRT_PCIE_MMIO].size; +@@ -1354,7 +1353,8 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 2, base_pio); + + for (i = 0; i < GPEX_NUM_IRQS; i++) { +- sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]); ++ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, ++ qdev_get_gpio_in(vms->gic, irq + i)); + gpex_set_irq_num(GPEX_HOST(dev), i, irq + i); + } + +@@ -1414,7 +1414,7 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) + if (vms->iommu) { + vms->iommu_phandle = qemu_fdt_alloc_phandle(vms->fdt); + +- create_smmu(vms, pic, pci->bus); ++ create_smmu(vms, pci->bus); + + qemu_fdt_setprop_cells(vms->fdt, nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, 0x10000); +@@ -1423,7 +1423,7 @@ static void create_pcie(VirtMachineState *vms, qemu_irq *pic) + g_free(nodename); + } + +-static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic) ++static void create_platform_bus(VirtMachineState *vms) + { + DeviceState *dev; + SysBusDevice *s; +@@ -1439,8 +1439,8 @@ static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic) + + s = SYS_BUS_DEVICE(dev); + for (i = 0; i < PLATFORM_BUS_NUM_IRQS; i++) { +- int irqn = vms->irqmap[VIRT_PLATFORM_BUS] + i; +- sysbus_connect_irq(s, i, pic[irqn]); ++ int irq = vms->irqmap[VIRT_PLATFORM_BUS] + i; ++ sysbus_connect_irq(s, i, qdev_get_gpio_in(vms->gic, irq)); + } + + memory_region_add_subregion(sysmem, +@@ -1621,7 +1621,6 @@ static void machvirt_init(MachineState *machine) + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; +- qemu_irq pic[NUM_IRQS]; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *secure_sysmem = NULL; + int n, virt_max_cpus; +@@ -1829,29 +1828,29 @@ static void machvirt_init(MachineState *machine) + + virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem); + +- create_gic(vms, pic); ++ create_gic(vms); + + fdt_add_pmu_nodes(vms); + +- create_uart(vms, pic, VIRT_UART, sysmem, serial_hd(0)); ++ create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); + + create_cpufreq(vms, sysmem); + + if (vms->secure) { + create_secure_ram(vms, secure_sysmem); +- create_uart(vms, pic, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); ++ create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); + } + + vms->highmem_ecam &= vms->highmem && (!firmware_loaded || aarch64); + +- create_rtc(vms, pic); ++ create_rtc(vms); + +- create_pcie(vms, pic); ++ create_pcie(vms); + + if (has_ged && aarch64 && firmware_loaded && acpi_enabled) { +- vms->acpi_dev = create_acpi_ged(vms, pic); ++ vms->acpi_dev = create_acpi_ged(vms); + } else { +- create_gpio(vms, pic); ++ create_gpio(vms); + } + + /* connect powerdown request */ +@@ -1862,12 +1861,12 @@ static void machvirt_init(MachineState *machine) + * (which will be automatically plugged in to the transports). If + * no backend is created the transport will just sit harmlessly idle. + */ +- create_virtio_devices(vms, pic); ++ create_virtio_devices(vms); + + vms->fw_cfg = create_fw_cfg(vms, &address_space_memory); + rom_set_fw(vms->fw_cfg); + +- create_platform_bus(vms, pic); ++ create_platform_bus(vms); + + vms->bootinfo.ram_size = machine->ram_size; + vms->bootinfo.kernel_filename = machine->kernel_filename; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index dcceb9c615..3dfefca93b 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -138,6 +138,7 @@ typedef struct { + uint32_t iommu_phandle; + int psci_conduit; + hwaddr highest_gpa; ++ DeviceState *gic; + DeviceState *acpi_dev; + Notifier powerdown_notifier; + } VirtMachineState; +-- +2.19.1 diff --git a/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch b/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch new file mode 100644 index 0000000000000000000000000000000000000000..0602ab8d4d0d7af63f034c9b66c984e5aed627a7 --- /dev/null +++ b/hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch @@ -0,0 +1,75 @@ +From 8d287871fd4e1b4654fe9e5011b80614cb44f6d8 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:28 +0100 +Subject: [PATCH] hw/arm/virt-acpi-build: Add PC-DIMM in SRAT + +Generate Memory Affinity Structures for PC-DIMM ranges. + +Also, Linux and Windows need ACPI SRAT table to make memory hotplug +work properly, however currently QEMU doesn't create SRAT table if +numa options aren't present on CLI. Hence add support(>=4.2) to +create numa node automatically (auto_enable_numa_with_memhp) when +QEMU is started with memory hotplug enabled but without '-numa' +options on CLI. + +Signed-off-by: Shameer Kolothum +Signed-off-by: Eric Auger +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-7-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/arm/virt-acpi-build.c | 9 +++++++++ + hw/arm/virt.c | 2 ++ + 2 files changed, 11 insertions(+) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index fca53ae01f..9622994e50 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -592,6 +592,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + int i, srat_start; + uint64_t mem_base; + MachineClass *mc = MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); + const CPUArchIdList *cpu_list = mc->possible_cpu_arch_ids(MACHINE(vms)); + + srat_start = table_data->len; +@@ -617,6 +618,14 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } + } + ++ if (ms->device_memory) { ++ numamem = acpi_data_push(table_data, sizeof *numamem); ++ build_srat_memory(numamem, ms->device_memory->base, ++ memory_region_size(&ms->device_memory->mr), ++ nb_numa_nodes - 1, ++ MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); ++ } ++ + build_header(linker, table_data, (void *)(table_data->data + srat_start), + "SRAT", table_data->len - srat_start, 3, NULL, NULL); + } +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8ccabd5159..ab33cce4b3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2173,6 +2173,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + hc->plug = virt_machine_device_plug_cb; + hc->unplug_request = virt_machine_device_unplug_request_cb; + mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memhp = true; + } + + static void virt_instance_init(Object *obj) +@@ -2278,6 +2279,7 @@ static void virt_machine_4_0_options(MachineClass *mc) + virt_machine_4_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + vmc->no_ged = true; ++ mc->auto_enable_numa_with_memhp = false; + } + DEFINE_VIRT_MACHINE(4, 0) + +-- +2.19.1 diff --git a/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch b/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch new file mode 100644 index 0000000000000000000000000000000000000000..3d711678a6bbd365da89b3039509259f9ffe3c2e --- /dev/null +++ b/hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch @@ -0,0 +1,25 @@ +From fbcb4ffa8648d0aa5be01c11816423a483f245ae Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 26 May 2020 22:39:23 +0800 +Subject: [PATCH] hw/arm/virt: add missing compat for kvm-no-adjvtime + +Machine compatibility for kvm-no-adjvtime is missed, +let's add it for virt machine 4.0 + +Signed-off-by: Ying Fang + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 4c727939..133d36a4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2492,6 +2492,7 @@ static void virt_machine_4_0_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + vmc->no_ged = true; + mc->auto_enable_numa_with_memhp = false; ++ vmc->kvm_no_adjvtime = true; + } + DEFINE_VIRT_MACHINE(4, 0) + +-- +2.23.0 + diff --git a/hw-arm-virt-vTPM-support.patch b/hw-arm-virt-vTPM-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..cbdc68e62f88981b10ebd459ce101caaf10dba6d --- /dev/null +++ b/hw-arm-virt-vTPM-support.patch @@ -0,0 +1,141 @@ +From 443ebab9c299b04f020a6873454facb078723141 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Thu, 13 Aug 2020 20:01:10 +0800 +Subject: [PATCH 15/19] hw/arm/virt: vTPM support + +Let the TPM TIS SYSBUS device be dynamically instantiable +in ARM virt. A device tree node is dynamically created +(TPM via MMIO). + +The TPM Physical Presence interface (PPI) is not supported. + +To run with the swtmp TPM emulator, the qemu command line must +be augmented with: + + -chardev socket,id=chrtpm,path=swtpm-sock + -tpmdev emulator,id=tpm0,chardev=chrtpm + -device tpm-tis-device,tpmdev=tpm0 + +swtpm/libtpms command line example: + +swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm +--ctrl type=unixio,path=swtpm-sock + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-7-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + hw/arm/Kconfig | 1 + + hw/arm/sysbus-fdt.c | 33 +++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 7 +++++++ + 3 files changed, 41 insertions(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 15e18b0a..06e49f26 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -5,6 +5,7 @@ config ARM_VIRT + imply VFIO_AMD_XGBE + imply VFIO_PLATFORM + imply VFIO_XGMAC ++ imply TPM_TIS_SYSBUS + select A15MPCORE + select ACPI + select ARM_SMMUV3 +diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c +index 57f94e65..c725d325 100644 +--- a/hw/arm/sysbus-fdt.c ++++ b/hw/arm/sysbus-fdt.c +@@ -30,6 +30,7 @@ + #include "hw/arm/sysbus-fdt.h" + #include "qemu/error-report.h" + #include "sysemu/device_tree.h" ++#include "sysemu/tpm.h" + #include "hw/platform-bus.h" + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-platform.h" +@@ -437,6 +438,37 @@ static bool vfio_platform_match(SysBusDevice *sbdev, + + #endif /* CONFIG_LINUX */ + ++/* ++ * add_tpm_tis_fdt_node: Create a DT node for TPM TIS ++ * ++ * See kernel documentation: ++ * Documentation/devicetree/bindings/security/tpm/tpm_tis_mmio.txt ++ * Optional interrupt for command completion is not exposed ++ */ ++static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque) ++{ ++ PlatformBusFDTData *data = opaque; ++ PlatformBusDevice *pbus = data->pbus; ++ void *fdt = data->fdt; ++ const char *parent_node = data->pbus_node_name; ++ char *nodename; ++ uint32_t reg_attr[2]; ++ uint64_t mmio_base; ++ ++ mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0); ++ nodename = g_strdup_printf("%s/tpm_tis@%" PRIx64, parent_node, mmio_base); ++ qemu_fdt_add_subnode(fdt, nodename); ++ ++ qemu_fdt_setprop_string(fdt, nodename, "compatible", "tcg,tpm-tis-mmio"); ++ ++ reg_attr[0] = cpu_to_be32(mmio_base); ++ reg_attr[1] = cpu_to_be32(0x5000); ++ qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, 2 * sizeof(uint32_t)); ++ ++ g_free(nodename); ++ return 0; ++} ++ + static int no_fdt_node(SysBusDevice *sbdev, void *opaque) + { + return 0; +@@ -457,6 +489,7 @@ static const BindingEntry bindings[] = { + TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node), + VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node), + #endif ++ TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node), + TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node), + TYPE_BINDING("", NULL), /* last element */ + }; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 133d36a4..7afc6c5e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -47,6 +47,7 @@ + #include "sysemu/numa.h" + #include "sysemu/cpus.h" + #include "sysemu/sysemu.h" ++#include "sysemu/tpm.h" + #include "sysemu/kvm.h" + #include "sysemu/cpus.h" + #include "sysemu/hw_accel.h" +@@ -2368,6 +2369,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM); ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); + mc->block_default_type = IF_VIRTIO; + mc->no_cdrom = 1; + mc->pci_allow_0_address = true; +@@ -2481,6 +2483,11 @@ type_init(machvirt_machine_init); + + static void virt_machine_4_1_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_TPM_TIS_SYSBUS, "ppi", "false" }, ++ }; ++ ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_VIRT_MACHINE_AS_LATEST(4, 1) + +-- +2.23.0 + diff --git a/hw-arm64-add-vcpu-cache-info-support.patch b/hw-arm64-add-vcpu-cache-info-support.patch index c9e843719b59a99112ee1867475846378c84264e..79e1dede39def063dc9d8a4f4b87339bcd39c435 100644 --- a/hw-arm64-add-vcpu-cache-info-support.patch +++ b/hw-arm64-add-vcpu-cache-info-support.patch @@ -1,6 +1,6 @@ -From 8db6d888e3eb131900111506b93f6101413df5b4 Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Mon, 5 Aug 2019 15:30:05 +0800 +From 5a0ed254f99ca37498bd81994b906b6984b5ffa9 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 22 Apr 2020 19:25:00 +0800 Subject: [PATCH] hw/arm64: add vcpu cache info support Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. @@ -8,16 +8,16 @@ Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. Signed-off-by: zhanghailiang Signed-off-by: Honghao --- - hw/acpi/aml-build.c | 124 ++++++++++++++++++++++++++++++++++++ - hw/arm/virt.c | 76 +++++++++++++++++++++- + hw/acpi/aml-build.c | 126 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 80 ++++++++++++++++++++++- include/hw/acpi/aml-build.h | 46 +++++++++++++ - 3 files changed, 245 insertions(+), 1 deletion(-) + 3 files changed, 251 insertions(+), 1 deletion(-) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c -index 9d39ad10..99209c0a 100644 +index f2c8c28f..74e95005 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c -@@ -55,6 +55,129 @@ static void build_append_array(GArray *array, GArray *val) +@@ -55,6 +55,131 @@ static void build_append_array(GArray *array, GArray *val) /* * ACPI 6.2 Processor Properties Topology Table (PPTT) */ @@ -115,6 +115,8 @@ index 9d39ad10..99209c0a 100644 + int pptt_start = table_data->len; + int uid = 0, cpus = 0, socket; + struct offset_status offset; ++ const MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cores = ms->smp.cores; + + acpi_data_push(table_data, sizeof(AcpiTableHeader)); + @@ -147,25 +149,27 @@ index 9d39ad10..99209c0a 100644 static void build_cpu_hierarchy(GArray *tbl, uint32_t flags, uint32_t parent, uint32_t id) { -@@ -100,6 +223,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) +@@ -103,6 +228,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, int possible_cpus) (void *)(table_data->data + pptt_start), "PPTT", table_data->len - pptt_start, 1, NULL, NULL); } +#endif - + #define ACPI_NAMESEG_LEN 4 - + diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 16700a2e..96f56e2e 100644 +index 272455bc..9669c70b 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -304,6 +304,77 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) +@@ -308,6 +308,81 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags); } - + +static void fdt_add_l3cache_nodes(const VirtMachineState *vms) +{ + int i; ++ const MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cores = ms->smp.cores; + unsigned int sockets = vms->smp_cpus / smp_cores; + + /* If current is not equal to max */ @@ -191,6 +195,8 @@ index 16700a2e..96f56e2e 100644 +static void fdt_add_l2cache_nodes(const VirtMachineState *vms) +{ + int i, j; ++ const MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cores = ms->smp.cores; + signed int sockets = vms->smp_cpus / smp_cores; + + /* If current is not equal to max */ @@ -237,17 +243,17 @@ index 16700a2e..96f56e2e 100644 static void fdt_add_cpu_nodes(const VirtMachineState *vms) { int cpu; -@@ -336,6 +407,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) +@@ -341,6 +416,9 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#address-cells", addr_cells); qemu_fdt_setprop_cell(vms->fdt, "/cpus", "#size-cells", 0x0); - + + fdt_add_l3cache_nodes(vms); + fdt_add_l2cache_nodes(vms); + for (cpu = vms->smp_cpus - 1; cpu >= 0; cpu--) { char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); -@@ -364,7 +438,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) +@@ -369,7 +447,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) qemu_fdt_setprop_cell(vms->fdt, nodename, "numa-node-id", ms->possible_cpus->cpus[cs->cpu_index].props.node_id); } @@ -255,7 +261,7 @@ index 16700a2e..96f56e2e 100644 + fdt_add_l1cache_prop(vms, nodename, cpu); qemu_fdt_setprop_cell(vms->fdt, nodename, "phandle", qemu_fdt_alloc_phandle(vms->fdt)); - + diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h index bfb0b100..0be3453a 100644 --- a/include/hw/acpi/aml-build.h @@ -263,7 +269,7 @@ index bfb0b100..0be3453a 100644 @@ -223,6 +223,52 @@ struct AcpiBuildTables { BIOSLinker *linker; } AcpiBuildTables; - + +#ifdef __aarch64__ +/* Definitions of the hardcoded cache info*/ + @@ -313,6 +319,5 @@ index bfb0b100..0be3453a 100644 /** * init_aml_allocator: * --- +-- 2.23.0 - diff --git a/hw-block-nvme-fix-pci-doorbell-size-calculation.patch b/hw-block-nvme-fix-pci-doorbell-size-calculation.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0aa09670e471a344c220ae38b8f5ba43b263eaf --- /dev/null +++ b/hw-block-nvme-fix-pci-doorbell-size-calculation.patch @@ -0,0 +1,62 @@ +From 1aa42c9269c762ad1b7efa41e92f734b093dce1c Mon Sep 17 00:00:00 2001 +From: Klaus Jensen +Date: Tue, 9 Jun 2020 21:03:12 +0200 +Subject: [PATCH 10/11] hw/block/nvme: fix pci doorbell size calculation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The size of the BAR is 0x1000 (main registers) + 8 bytes for each +queue. Currently, the size of the BAR is calculated like so: + + n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); + +Since the 'num_queues' parameter already accounts for the admin queue, +this should in any case not need to be incremented by one. Also, the +size should be initialized to (0x1000). + + n->reg_size = pow2ceil(0x1000 + 2 * n->num_queues * 4); + +This, with the default value of num_queues (64), we will set aside room +for 1 admin queue and 63 I/O queues (4 bytes per doorbell, 2 doorbells +per queue). + +Signed-off-by: Klaus Jensen +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Maxim Levitsky +Reviewed-by: Keith Busch +Message-Id: <20200609190333.59390-2-its@irrelevant.dk> +Signed-off-by: Kevin Wolf +Signed-off-by: BiaoXiang Ye +--- + hw/block/nvme.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/block/nvme.c b/hw/block/nvme.c +index 417068d8..edac2f1d 100644 +--- a/hw/block/nvme.c ++++ b/hw/block/nvme.c +@@ -42,6 +42,9 @@ + #include "trace.h" + #include "nvme.h" + ++#define NVME_REG_SIZE 0x1000 ++#define NVME_DB_SIZE 4 ++ + #define NVME_GUEST_ERR(trace, fmt, ...) \ + do { \ + (trace_##trace)(__VA_ARGS__); \ +@@ -1348,7 +1351,9 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) + pcie_endpoint_cap_init(pci_dev, 0x80); + + n->num_namespaces = 1; +- n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); ++ ++ /* num_queues is really number of pairs, so each has two doorbells */ ++ n->reg_size = pow2ceil(NVME_REG_SIZE + 2 * n->num_queues * NVME_DB_SIZE); + n->ns_size = bs_size / (uint64_t)n->num_namespaces; + + n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); +-- +2.27.0.dirty + diff --git a/hw-block-nvme-fix-pin-based-interrupt-behavior.patch b/hw-block-nvme-fix-pin-based-interrupt-behavior.patch new file mode 100644 index 0000000000000000000000000000000000000000..1fe1213d998869c0f87eabd5d75fc62c3750f06b --- /dev/null +++ b/hw-block-nvme-fix-pin-based-interrupt-behavior.patch @@ -0,0 +1,87 @@ +From 74ef18c90684f0ae18aef071b9e11a5e8796177b Mon Sep 17 00:00:00 2001 +From: alexchen +Date: Tue, 8 Sep 2020 11:17:20 +0000 +Subject: [PATCH] hw/block/nvme: fix pin-based interrupt behavior + +First, since the device only supports MSI-X or pin-based interrupt, if +MSI-X is not enabled, it should not accept interrupt vectors different +from 0 when creating completion queues. + +Secondly, the irq_status NvmeCtrl member is meant to be compared to the +INTMS register, so it should only be 32 bits wide. And it is really only +useful when used with multi-message MSI. + +Third, since we do not force a 1-to-1 correspondence between cqid and +interrupt vector, the irq_status register should not have bits set +according to cqid, but according to the associated interrupt vector. + +Fix these issues, but keep irq_status available so we can easily support +multi-message MSI down the line. + +Fixes: 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt behaviour of NVMe") +Cc: "Michael S. Tsirkin" +Cc: Marcel Apfelbaum +Signed-off-by: Klaus Jensen +Reviewed-by: Keith Busch +Message-Id: <20200609190333.59390-8-its@irrelevant.dk> +Signed-off-by: Kevin Wolf +Signed-off-by: BiaoXiang Ye +Signed-off-by: Zhenyu Ye +--- + hw/block/nvme.c | 12 ++++++++---- + hw/block/nvme.h | 2 +- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/hw/block/nvme.c b/hw/block/nvme.c +index 36d6a8bb..e35c2e10 100644 +--- a/hw/block/nvme.c ++++ b/hw/block/nvme.c +@@ -115,8 +115,8 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) + msix_notify(&(n->parent_obj), cq->vector); + } else { + trace_nvme_irq_pin(); +- assert(cq->cqid < 64); +- n->irq_status |= 1 << cq->cqid; ++ assert(cq->vector < 32); ++ n->irq_status |= 1 << cq->vector; + nvme_irq_check(n); + } + } else { +@@ -130,8 +130,8 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) + if (msix_enabled(&(n->parent_obj))) { + return; + } else { +- assert(cq->cqid < 64); +- n->irq_status &= ~(1 << cq->cqid); ++ assert(cq->vector < 32); ++ n->irq_status &= ~(1 << cq->vector); + nvme_irq_check(n); + } + } +@@ -630,6 +630,10 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) + trace_nvme_err_invalid_create_cq_addr(prp1); + return NVME_INVALID_FIELD | NVME_DNR; + } ++ if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { ++ trace_nvme_err_invalid_create_cq_vector(vector); ++ return NVME_INVALID_IRQ_VECTOR | NVME_DNR; ++ } + if (unlikely(vector > n->num_queues)) { + trace_nvme_err_invalid_create_cq_vector(vector); + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; +diff --git a/hw/block/nvme.h b/hw/block/nvme.h +index 557194ee..f4c1ff91 100644 +--- a/hw/block/nvme.h ++++ b/hw/block/nvme.h +@@ -78,7 +78,7 @@ typedef struct NvmeCtrl { + uint32_t cmbsz; + uint32_t cmbloc; + uint8_t *cmbuf; +- uint64_t irq_status; ++ uint32_t irq_status; + uint64_t host_timestamp; /* Timestamp sent by the host */ + uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + +-- +2.23.0 + diff --git a/hw-core-loader-Fix-possible-crash-in-rom_copy.patch b/hw-core-loader-Fix-possible-crash-in-rom_copy.patch new file mode 100644 index 0000000000000000000000000000000000000000..770f12b1acf9dfc3c4289e9a9bea7d5936df1968 --- /dev/null +++ b/hw-core-loader-Fix-possible-crash-in-rom_copy.patch @@ -0,0 +1,45 @@ +From aae0faa5d3bee91c66dc4c1543190f55a242771e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 25 Sep 2019 14:16:43 +0200 +Subject: [PATCH] hw/core/loader: Fix possible crash in rom_copy() + +Both, "rom->addr" and "addr" are derived from the binary image +that can be loaded with the "-kernel" paramer. The code in +rom_copy() then calculates: + + d = dest + (rom->addr - addr); + +and uses "d" as destination in a memcpy() some lines later. Now with +bad kernel images, it is possible that rom->addr is smaller than addr, +thus "rom->addr - addr" gets negative and the memcpy() then tries to +copy contents from the image to a bad memory location. This could +maybe be used to inject code from a kernel image into the QEMU binary, +so we better fix it with an additional sanity check here. + +Cc: qemu-stable@nongnu.org +Reported-by: Guangming Liu +Buglink: https://bugs.launchpad.net/qemu/+bug/1844635 +Message-Id: <20190925130331.27825-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Thomas Huth +(cherry picked from commit e423455c4f23a1a828901c78fe6d03b7dde79319) +Signed-off-by: Michael Roth +--- + hw/core/loader.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/core/loader.c b/hw/core/loader.c +index 425bf69a99..838a34174a 100644 +--- a/hw/core/loader.c ++++ b/hw/core/loader.c +@@ -1242,7 +1242,7 @@ int rom_copy(uint8_t *dest, hwaddr addr, size_t size) + if (rom->addr + rom->romsize < addr) { + continue; + } +- if (rom->addr > end) { ++ if (rom->addr > end || rom->addr < addr) { + break; + } + +-- +2.23.0 diff --git a/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch b/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch new file mode 100644 index 0000000000000000000000000000000000000000..98e3c3bed9a221c978c8f733e5d587dc2803180b --- /dev/null +++ b/hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch @@ -0,0 +1,46 @@ +From b47d7ad29bc7f30d4ea3fdb0ef86942468416b79 Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Mon, 2 Nov 2020 16:52:17 +0000 +Subject: [PATCH] hw/display/exynos4210_fimd: Fix potential NULL pointer + dereference +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In exynos4210_fimd_update(), the pointer s is dereferinced before +being check if it is valid, which may lead to NULL pointer dereference. +So move the assignment to global_width after checking that the s is valid. + +Reported-by: Euler Robot +Signed-off-by: Alex Chen +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 5F9F8D88.9030102@huawei.com +Signed-off-by: Peter Maydell +(cherry-picked from commit 18520fa465) +--- + hw/display/exynos4210_fimd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c +index 61f7408b1c..85b0ebf23a 100644 +--- a/hw/display/exynos4210_fimd.c ++++ b/hw/display/exynos4210_fimd.c +@@ -1271,12 +1271,14 @@ static void exynos4210_fimd_update(void *opaque) + bool blend = false; + uint8_t *host_fb_addr; + bool is_dirty = false; +- const int global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; ++ int global_width; + + if (!s || !s->console || !s->enabled || + surface_bits_per_pixel(qemu_console_surface(s->console)) == 0) { + return; + } ++ ++ global_width = (s->vidtcon[2] & FIMD_VIDTCON2_SIZE_MASK) + 1; + exynos4210_update_resolution(s); + surface = qemu_console_surface(s->console); + +-- +2.27.0 + diff --git a/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch b/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f11b2d8bbc047a93dd11cf9c6a16eb757676f86 --- /dev/null +++ b/hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch @@ -0,0 +1,49 @@ +From 38697076a98034a078c2411234b8979cf3cec6da Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Mon, 2 Nov 2020 16:52:17 +0000 +Subject: [PATCH] hw/display/omap_lcdc: Fix potential NULL pointer dereference + +In omap_lcd_interrupts(), the pointer omap_lcd is dereferinced before +being check if it is valid, which may lead to NULL pointer dereference. +So move the assignment to surface after checking that the omap_lcd is valid +and move surface_bits_per_pixel(surface) to after the surface assignment. + +Reported-by: Euler Robot +Signed-off-by: AlexChen +Message-id: 5F9CDB8A.9000001@huawei.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry-picked from commit 0080edc45e) +--- + hw/display/omap_lcdc.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/display/omap_lcdc.c b/hw/display/omap_lcdc.c +index 07a5effe04..13ab73ec61 100644 +--- a/hw/display/omap_lcdc.c ++++ b/hw/display/omap_lcdc.c +@@ -77,14 +77,18 @@ static void omap_lcd_interrupts(struct omap_lcd_panel_s *s) + static void omap_update_display(void *opaque) + { + struct omap_lcd_panel_s *omap_lcd = (struct omap_lcd_panel_s *) opaque; +- DisplaySurface *surface = qemu_console_surface(omap_lcd->con); ++ DisplaySurface *surface; + draw_line_func draw_line; + int size, height, first, last; + int width, linesize, step, bpp, frame_offset; + hwaddr frame_base; + +- if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable || +- !surface_bits_per_pixel(surface)) { ++ if (!omap_lcd || omap_lcd->plm == 1 || !omap_lcd->enable) { ++ return; ++ } ++ ++ surface = qemu_console_surface(omap_lcd->con); ++ if (!surface_bits_per_pixel(surface)) { + return; + } + +-- +2.27.0 + diff --git a/hw-ehci-check-return-value-of-usb_packet_map.patch b/hw-ehci-check-return-value-of-usb_packet_map.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c05a2e61f8deef9a36ca8500cfabcb9736d14bc --- /dev/null +++ b/hw-ehci-check-return-value-of-usb_packet_map.patch @@ -0,0 +1,49 @@ +From 02d63f9fd9655f1899dabbccaf0568bfaa3e97df Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Wed, 12 Aug 2020 09:17:27 -0700 +Subject: [PATCH] hw: ehci: check return value of 'usb_packet_map' + +If 'usb_packet_map' fails, we should stop to process the usb +request. + +Signed-off-by: Li Qiang +Message-Id: <20200812161727.29412-1-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann +(cherry-picked from 2fdb42d8) +Fix CVE-2020-25723 +Signed-off-by: Alex Chen +--- + hw/usb/hcd-ehci.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 5f089f3005..433e6a4fc0 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1370,7 +1370,10 @@ static int ehci_execute(EHCIPacket *p, const char *action) + spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0); + usb_packet_setup(&p->packet, p->pid, ep, 0, p->qtdaddr, spd, + (p->qtd.token & QTD_TOKEN_IOC) != 0); +- usb_packet_map(&p->packet, &p->sgl); ++ if (usb_packet_map(&p->packet, &p->sgl)) { ++ qemu_sglist_destroy(&p->sgl); ++ return -1; ++ } + p->async = EHCI_ASYNC_INITIALIZED; + } + +@@ -1449,7 +1452,10 @@ static int ehci_process_itd(EHCIState *ehci, + if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) { + usb_packet_setup(&ehci->ipacket, pid, ep, 0, addr, false, + (itd->transact[i] & ITD_XACT_IOC) != 0); +- usb_packet_map(&ehci->ipacket, &ehci->isgl); ++ if (usb_packet_map(&ehci->ipacket, &ehci->isgl)) { ++ qemu_sglist_destroy(&ehci->isgl); ++ return -1; ++ } + usb_handle_packet(dev, &ehci->ipacket); + usb_packet_unmap(&ehci->ipacket, &ehci->isgl); + } else { +-- +2.27.0 + diff --git a/hw-ide-check-null-block-before-_cancel_dma_sync.patch b/hw-ide-check-null-block-before-_cancel_dma_sync.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ff20a9683ec88de3d3a67086ffc82eedff9697e --- /dev/null +++ b/hw-ide-check-null-block-before-_cancel_dma_sync.patch @@ -0,0 +1,64 @@ +From 3b23698e240bd0efe987cf113e3bc8d233991d21 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Wed, 14 Oct 2020 15:57:18 +0800 +Subject: [PATCH] hw/ide: check null block before _cancel_dma_sync + +fix CVE-2020-25743 + +patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05967.html + +When canceling an i/o operation via ide_cancel_dam_sync(), +a block pointer may be null. Add check to avoid null pointer +dereference. + + -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Fide_nullptr1 + ==1803100==Hint: address points to the zero page. + #0 blk_bs ../block/block-backend.c:714 + #1 blk_drain ../block/block-backend.c:1715 + #2 ide_cancel_dma_sync ../hw/ide/core.c:723 + #3 bmdma_cmd_writeb ../hw/ide/core.c:723 + #4 bmdma_write ../hw/ide/pci.c:298 + #5 memory_region_write_accessor ../softmmu/memory.c:483 + #6 access_with_adjusted_size ../softmmu/memory.c:544 + #7 memory_region_dispatch_write ../softmmu/memory.c:1465 + #8 flatview_write_continue ../exe.c:3176 + ... + +Reported-by: Ruhr-University +Signed-off-by: Prasad J Pandit +--- + hw/ide/core.c | 1 + + hw/ide/pci.c | 5 ++++- + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/ide/core.c b/hw/ide/core.c +index f76f7e5234..8105187f49 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -718,6 +718,7 @@ void ide_cancel_dma_sync(IDEState *s) + * whole DMA operation will be submitted to disk with a single + * aio operation with preadv/pwritev. + */ ++ assert(s->blk); + if (s->bus->dma->aiocb) { + trace_ide_cancel_dma_sync_remaining(); + blk_drain(s->blk); +diff --git a/hw/ide/pci.c b/hw/ide/pci.c +index b50091b615..b47e675456 100644 +--- a/hw/ide/pci.c ++++ b/hw/ide/pci.c +@@ -295,7 +295,10 @@ void bmdma_cmd_writeb(BMDMAState *bm, uint32_t val) + /* Ignore writes to SSBM if it keeps the old value */ + if ((val & BM_CMD_START) != (bm->cmd & BM_CMD_START)) { + if (!(val & BM_CMD_START)) { +- ide_cancel_dma_sync(idebus_active_if(bm->bus)); ++ IDEState *s = idebus_active_if(bm->bus); ++ if (s->blk) { ++ ide_cancel_dma_sync(s); ++ } + bm->status &= ~BM_STATUS_DMAING; + } else { + bm->cur_addr = bm->addr; +-- +2.23.0 + diff --git a/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch b/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch new file mode 100644 index 0000000000000000000000000000000000000000..de999b8c89a41d54879d2fd22cc3b852e6c16138 --- /dev/null +++ b/hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch @@ -0,0 +1,70 @@ +From 3e28567104500238b89ea6b4d684c5350194fea9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 21 Jun 2021 10:12:41 +0800 +Subject: [PATCH] hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-20221 + +Per the ARM Generic Interrupt Controller Architecture specification +(document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit, +not 10: + + - 4.3 Distributor register descriptions + - 4.3.15 Software Generated Interrupt Register, GICD_SG + + - Table 4-21 GICD_SGIR bit assignments + + The Interrupt ID of the SGI to forward to the specified CPU + interfaces. The value of this field is the Interrupt ID, in + the range 0-15, for example a value of 0b0011 specifies + Interrupt ID 3. + +Correct the irq mask to fix an undefined behavior (which eventually +lead to a heap-buffer-overflow, see [Buglink]): + + $ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M virt,accel=qtest -qtest stdio + [I 1612088147.116987] OPENED + [R +0.278293] writel 0x8000f00 0xff4affb0 + ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for type 'uint8_t [16][8]' + SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../hw/intc/arm_gic.c:1498:13 + +This fixes a security issue when running with KVM on Arm with +kernel-irqchip=off. (The default is kernel-irqchip=on, which is +unaffected, and which is also the correct choice for performance.) + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2021-20221 +Fixes: 9ee6e8bb ("ARMv7 support.") +Buglink: https://bugs.launchpad.net/qemu/+bug/1913916 +Buglink: https://bugs.launchpad.net/qemu/+bug/1913917 + +Reported-by: Alexander Bulekov's avatarAlexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé's avatarPhilippe Mathieu-Daudé +Message-id: 20210131103401.217160-1-f4bug@amsat.org +Reviewed-by: Peter Maydell's avatarPeter Maydell +Signed-off-by: Peter Maydell's avatarPeter Maydell + +Signed-off-by: Jiajie Li +--- + hw/intc/arm_gic.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c +index 77427a4188..492dabaa1c 100644 +--- a/hw/intc/arm_gic.c ++++ b/hw/intc/arm_gic.c +@@ -1454,7 +1454,7 @@ static void gic_dist_writel(void *opaque, hwaddr offset, + int target_cpu; + + cpu = gic_get_current_cpu(s); +- irq = value & 0x3ff; ++ irq = value & 0xf; + switch ((value >> 24) & 3) { + case 0: + mask = (value >> 16) & ALL_CPU_MASK; +-- +2.27.0 + diff --git a/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch b/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b77e0b0ce8a678b0c13b7f9f852522617b90c71 --- /dev/null +++ b/hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch @@ -0,0 +1,170 @@ +From 6bbfb186c8d66b745aeb08143d3198fcedc52d6c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 6 Apr 2020 11:26:35 +0800 +Subject: [PATCH] hw/intc/gicv3: Add CPU hotplug realize hook + +GICv3 exposes individual CPU realization capability through +this hook. It will be used for hotplugged CPU. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3.c | 17 ++++++++++++++++- + hw/intc/arm_gicv3_common.c | 8 ++++++++ + hw/intc/arm_gicv3_kvm.c | 11 +++++++++++ + include/hw/intc/arm_gicv3.h | 2 ++ + include/hw/intc/arm_gicv3_common.h | 4 ++++ + 5 files changed, 41 insertions(+), 1 deletion(-) + +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index 2fe79f794d..cacef26546 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -361,6 +361,19 @@ static const MemoryRegionOps gic_ops[] = { + } + }; + ++static void gicv3_cpu_realize(GICv3State *s, int i) ++{ ++ gicv3_init_one_cpuif(s, i); ++} ++ ++static void arm_gicv3_cpu_hotplug_realize(GICv3State *s, int ncpu) ++{ ++ ARMGICv3Class *agc = ARM_GICV3_GET_CLASS(s); ++ ++ agc->parent_cpu_hotplug_realize(s, ncpu); ++ gicv3_cpu_realize(s, ncpu); ++} ++ + static void arm_gic_realize(DeviceState *dev, Error **errp) + { + /* Device instance realize function for the GIC sysbus device */ +@@ -388,7 +401,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) + } + + for (i = 0; i < s->num_cpu; i++) { +- gicv3_init_one_cpuif(s, i); ++ gicv3_cpu_realize(s, i); + } + } + +@@ -398,6 +411,8 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); + ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); + ++ agc->parent_cpu_hotplug_realize = agcc->cpu_hotplug_realize; ++ agcc->cpu_hotplug_realize = arm_gicv3_cpu_hotplug_realize; + agcc->post_load = arm_gicv3_post_load; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); + } +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 798f295d7c..8740a52c9f 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -313,6 +313,11 @@ static void arm_gicv3_common_cpu_realize(GICv3State *s, int ncpu) + gicv3_set_gicv3state(cpu, &s->cpu[ncpu]); + } + ++static void arm_gicv3_common_cpu_hotplug_realize(GICv3State *s, int ncpu) ++{ ++ arm_gicv3_common_cpu_realize(s, ncpu); ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -357,6 +362,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + + for (i = 0; i < s->num_cpu; i++) { + CPUState *cpu = qemu_get_cpu(i); ++ + uint64_t cpu_affid; + int last; + +@@ -508,12 +514,14 @@ static Property arm_gicv3_common_properties[] = { + static void arm_gicv3_common_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); ++ ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); + ARMLinuxBootIfClass *albifc = ARM_LINUX_BOOT_IF_CLASS(klass); + + dc->reset = arm_gicv3_common_reset; + dc->realize = arm_gicv3_common_realize; + dc->props = arm_gicv3_common_properties; + dc->vmsd = &vmstate_gicv3; ++ agcc->cpu_hotplug_realize = arm_gicv3_common_cpu_hotplug_realize; + albifc->arm_linux_init = arm_gic_common_linux_init; + } + +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index b2936938cb..f8d7be5479 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -78,6 +78,7 @@ typedef struct KVMARMGICv3Class { + ARMGICv3CommonClass parent_class; + DeviceRealize parent_realize; + void (*parent_reset)(DeviceState *dev); ++ CPUHotplugRealize parent_cpu_hotplug_realize; + } KVMARMGICv3Class; + + static void kvm_arm_gicv3_set_irq(void *opaque, int irq, int level) +@@ -768,6 +769,14 @@ static void kvm_arm_gicv3_cpu_realize(GICv3State *s, int ncpu) + define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); + } + ++static void kvm_arm_gicv3_cpu_hotplug_realize(GICv3State *s, int ncpu) ++{ ++ KVMARMGICv3Class *kagcc = KVM_ARM_GICV3_GET_CLASS(s); ++ ++ kagcc->parent_cpu_hotplug_realize(s, ncpu); ++ kvm_arm_gicv3_cpu_realize(s, ncpu); ++} ++ + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = KVM_ARM_GICV3(dev); +@@ -884,6 +893,8 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass); + KVMARMGICv3Class *kgc = KVM_ARM_GICV3_CLASS(klass); + ++ kgc->parent_cpu_hotplug_realize = agcc->cpu_hotplug_realize; ++ agcc->cpu_hotplug_realize = kvm_arm_gicv3_cpu_hotplug_realize; + agcc->pre_save = kvm_arm_gicv3_get; + agcc->post_load = kvm_arm_gicv3_put; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, +diff --git a/include/hw/intc/arm_gicv3.h b/include/hw/intc/arm_gicv3.h +index 4a6fd85e22..98f2bdb7e9 100644 +--- a/include/hw/intc/arm_gicv3.h ++++ b/include/hw/intc/arm_gicv3.h +@@ -26,6 +26,8 @@ typedef struct ARMGICv3Class { + ARMGICv3CommonClass parent_class; + /*< public >*/ + ++ CPUHotplugRealize parent_cpu_hotplug_realize; ++ + DeviceRealize parent_realize; + } ARMGICv3Class; + +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 31ec9a1ae4..45cc50ed3b 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -286,11 +286,15 @@ GICV3_BITMAP_ACCESSORS(edge_trigger) + #define ARM_GICV3_COMMON_GET_CLASS(obj) \ + OBJECT_GET_CLASS(ARMGICv3CommonClass, (obj), TYPE_ARM_GICV3_COMMON) + ++typedef void (*CPUHotplugRealize)(GICv3State *s, int ncpu); ++ + typedef struct ARMGICv3CommonClass { + /*< private >*/ + SysBusDeviceClass parent_class; + /*< public >*/ + ++ CPUHotplugRealize cpu_hotplug_realize; ++ + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); + } ARMGICv3CommonClass; +-- +2.19.1 diff --git a/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch b/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bffc1cf188e0ca6167e1ffb3cd138f14073dc8b --- /dev/null +++ b/hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch @@ -0,0 +1,46 @@ +From 2b157688d19da5ce4fca6b5f3c78d2e309ecec9a Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Wed, 11 Nov 2020 18:36:36 +0530 +Subject: [PATCH] hw/net/e1000e: advance desc_offset in case of null descriptor + +While receiving packets via e1000e_write_packet_to_guest() routine, +'desc_offset' is advanced only when RX descriptor is processed. And +RX descriptor is not processed if it has NULL buffer address. +This may lead to an infinite loop condition. Increament 'desc_offset' +to process next descriptor in the ring to avoid infinite loop. + +Reported-by: Cheol-woo Myung <330cjfdn@gmail.com> +Signed-off-by: Prasad J Pandit +Signed-off-by: Jason Wang +(cherry-picked from c2cb5116) +Fix CVE-2020-28916 +Signed-off-by: Alex Chen +--- + hw/net/e1000e_core.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c +index 2a221c2ef9..e45d47f584 100644 +--- a/hw/net/e1000e_core.c ++++ b/hw/net/e1000e_core.c +@@ -1595,13 +1595,13 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, + (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); + } + } +- desc_offset += desc_size; +- if (desc_offset >= total_size) { +- is_last = true; +- } + } else { /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + } ++ desc_offset += desc_size; ++ if (desc_offset >= total_size) { ++ is_last = true; ++ } + + e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, + rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); +-- +2.27.0 + diff --git a/hw-net-fix-vmxnet3-live-migration.patch b/hw-net-fix-vmxnet3-live-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..be97b3ac0dc76d839f646078151cddc0861ab094 --- /dev/null +++ b/hw-net-fix-vmxnet3-live-migration.patch @@ -0,0 +1,136 @@ +From b8b9f58ee5d3cff0a1e7cca770fe632043efb728 Mon Sep 17 00:00:00 2001 +From: Marcel Apfelbaum +Date: Fri, 5 Jul 2019 04:07:11 +0300 +Subject: [PATCH] hw/net: fix vmxnet3 live migration + +At some point vmxnet3 live migration stopped working and git-bisect +didn't help finding a working version. +The issue is the PCI configuration space is not being migrated +successfully and MSIX remains masked at destination. + +Remove the migration differentiation between PCI and PCIe since +the logic resides now inside VMSTATE_PCI_DEVICE. +Remove also the VMXNET3_COMPAT_FLAG_DISABLE_PCIE based differentiation +since at 'realize' time is decided if the device is PCI or PCIe, +then the above macro is enough. + +Use the opportunity to move to the standard VMSTATE_MSIX +instead of the deprecated SaveVMHandlers. + +Signed-off-by: Marcel Apfelbaum +Message-Id: <20190705010711.23277-1-marcel.apfelbaum@gmail.com> +Tested-by: Sukrit Bhatnagar +Reviewed-by: Dmitry Fleytman +Signed-off-by: Dr. David Alan Gilbert +--- + hw/net/vmxnet3.c | 52 ++---------------------------------------------- + 1 file changed, 2 insertions(+), 50 deletions(-) + +diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c +index ecc4f5bcf0..bf8e6ca4c9 100644 +--- a/hw/net/vmxnet3.c ++++ b/hw/net/vmxnet3.c +@@ -2153,21 +2153,6 @@ vmxnet3_cleanup_msi(VMXNET3State *s) + msi_uninit(d); + } + +-static void +-vmxnet3_msix_save(QEMUFile *f, void *opaque) +-{ +- PCIDevice *d = PCI_DEVICE(opaque); +- msix_save(d, f); +-} +- +-static int +-vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) +-{ +- PCIDevice *d = PCI_DEVICE(opaque); +- msix_load(d, f); +- return 0; +-} +- + static const MemoryRegionOps b0_ops = { + .read = vmxnet3_io_bar0_read, + .write = vmxnet3_io_bar0_write, +@@ -2188,11 +2173,6 @@ static const MemoryRegionOps b1_ops = { + }, + }; + +-static SaveVMHandlers savevm_vmxnet3_msix = { +- .save_state = vmxnet3_msix_save, +- .load_state = vmxnet3_msix_load, +-}; +- + static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) + { + uint64_t dsn_payload; +@@ -2215,7 +2195,6 @@ static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) + + static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) + { +- DeviceState *dev = DEVICE(pci_dev); + VMXNET3State *s = VMXNET3(pci_dev); + int ret; + +@@ -2261,8 +2240,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) + pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, + vmxnet3_device_serial_num(s)); + } +- +- register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s); + } + + static void vmxnet3_instance_init(Object *obj) +@@ -2452,29 +2429,6 @@ static const VMStateDescription vmstate_vmxnet3_int_state = { + } + }; + +-static bool vmxnet3_vmstate_need_pcie_device(void *opaque) +-{ +- VMXNET3State *s = VMXNET3(opaque); +- +- return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); +-} +- +-static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) +-{ +- return !vmxnet3_vmstate_need_pcie_device(opaque); +-} +- +-static const VMStateDescription vmstate_vmxnet3_pcie_device = { +- .name = "vmxnet3/pcie", +- .version_id = 1, +- .minimum_version_id = 1, +- .needed = vmxnet3_vmstate_need_pcie_device, +- .fields = (VMStateField[]) { +- VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), +- VMSTATE_END_OF_LIST() +- } +-}; +- + static const VMStateDescription vmstate_vmxnet3 = { + .name = "vmxnet3", + .version_id = 1, +@@ -2482,9 +2436,8 @@ static const VMStateDescription vmstate_vmxnet3 = { + .pre_save = vmxnet3_pre_save, + .post_load = vmxnet3_post_load, + .fields = (VMStateField[]) { +- VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, +- vmxnet3_vmstate_test_pci_device, 0, +- vmstate_pci_device, PCIDevice), ++ VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), ++ VMSTATE_MSIX(parent_obj, VMXNET3State), + VMSTATE_BOOL(rx_packets_compound, VMXNET3State), + VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), + VMSTATE_BOOL(lro_supported, VMXNET3State), +@@ -2520,7 +2473,6 @@ static const VMStateDescription vmstate_vmxnet3 = { + }, + .subsections = (const VMStateDescription*[]) { + &vmxstate_vmxnet3_mcast_list, +- &vmstate_vmxnet3_pcie_device, + NULL + } + }; +-- +2.27.0 + diff --git a/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch b/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch new file mode 100644 index 0000000000000000000000000000000000000000..a763f93bc4859f6f38fa8f6d83da6d84f1ab01f2 --- /dev/null +++ b/hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch @@ -0,0 +1,40 @@ +From 596e7e8908b742f727d02ec9ab747116573f67e0 Mon Sep 17 00:00:00 2001 +From: Mauro Matteo Cascella +Date: Sat, 1 Aug 2020 18:42:38 +0200 +Subject: [PATCH] hw/net/net_tx_pkt: fix assertion failure in + net_tx_pkt_add_raw_fragment() + +An assertion failure issue was found in the code that processes network packets +while adding data fragments into the packet context. It could be abused by a +malicious guest to abort the QEMU process on the host. This patch replaces the +affected assert() with a conditional statement, returning false if the current +data fragment exceeds max_raw_frags. + +Reported-by: Alexander Bulekov +Reported-by: Ziming Zhang +Reviewed-by: Dmitry Fleytman +Signed-off-by: Mauro Matteo Cascella +Signed-off-by: Jason Wang +--- + hw/net/net_tx_pkt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c +index 162f802dd7..54d4c3bbd0 100644 +--- a/hw/net/net_tx_pkt.c ++++ b/hw/net/net_tx_pkt.c +@@ -379,7 +379,10 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + hwaddr mapped_len = 0; + struct iovec *ventry; + assert(pkt); +- assert(pkt->max_raw_frags > pkt->raw_frags); ++ ++ if (pkt->raw_frags >= pkt->max_raw_frags) { ++ return false; ++ } + + if (!len) { + return true; +-- +2.23.0 + diff --git a/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..dd3a972936049057760b5819669beec96cbeb48c --- /dev/null +++ b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch @@ -0,0 +1,39 @@ +From e921d308845a0249126c59655d985007acf58ed7 Mon Sep 17 00:00:00 2001 +From: Qiang Ning +Date: Mon, 12 Jul 2021 17:30:45 +0800 +Subject: [PATCH] hw/net/rocker_of_dpa: fix double free bug of rocker device + +The of_dpa_cmd_add_l2_flood function of the rocker device +releases the memory of group->l2_flood.group_ids before +applying for new memory. If the l2_group configured by +the guest does not match the input group->l2_flood.group_ids, +the err_out branch is redirected to release the memory of the +group->l2_flood.group_ids branch. The pointer is not set to +NULL after the memory is freed. When the guest accesses the +of_dpa_cmd_add_l2_flood function again, the memory of +group->l2_flood.group_ids is released again. As a result, +the memory is double free. + +Fix that by setting group->l2_flood.group_ids to NULL after free. + +Signed-off-by: Jiajie Li +Signed-off-by: Qiang Ning +--- + hw/net/rocker/rocker_of_dpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c +index 8e347d1ee4..0c9de5f014 100644 +--- a/hw/net/rocker/rocker_of_dpa.c ++++ b/hw/net/rocker/rocker_of_dpa.c +@@ -2070,6 +2070,7 @@ static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); ++ group->l2_flood.group_ids = NULL; + g_free(tlvs); + + return err; +-- +2.27.0 + diff --git a/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch b/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch new file mode 100644 index 0000000000000000000000000000000000000000..62be98b6f147c098f57efbad27cbc6a5831d5ea2 --- /dev/null +++ b/hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch @@ -0,0 +1,58 @@ +From 2d18434c1ca66d68f80954be6828a3770176dab4 Mon Sep 17 00:00:00 2001 +From: Mauro Matteo Cascella +Date: Fri, 10 Jul 2020 11:19:41 +0200 +Subject: [PATCH] hw/net/xgmac: Fix buffer overflow in xgmac_enet_send() + +A buffer overflow issue was reported by Mr. Ziming Zhang, CC'd here. It +occurs while sending an Ethernet frame due to missing break statements +and improper checking of the buffer size. + +Reported-by: Ziming Zhang +Signed-off-by: Mauro Matteo Cascella +Reviewed-by: Peter Maydell +Signed-off-by: Jason Wang +--- + hw/net/xgmac.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c +index f49df95b07..f496f7ed4c 100644 +--- a/hw/net/xgmac.c ++++ b/hw/net/xgmac.c +@@ -217,21 +217,31 @@ static void xgmac_enet_send(XgmacState *s) + } + len = (bd.buffer1_size & 0xfff) + (bd.buffer2_size & 0xfff); + ++ /* ++ * FIXME: these cases of malformed tx descriptors (bad sizes) ++ * should probably be reported back to the guest somehow ++ * rather than simply silently stopping processing, but we ++ * don't know what the hardware does in this situation. ++ * This will only happen for buggy guests anyway. ++ */ + if ((bd.buffer1_size & 0xfff) > 2048) { + DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " + "xgmac buffer 1 len on send > 2048 (0x%x)\n", + __func__, bd.buffer1_size & 0xfff); ++ break; + } + if ((bd.buffer2_size & 0xfff) != 0) { + DEBUGF_BRK("qemu:%s:ERROR...ERROR...ERROR... -- " + "xgmac buffer 2 len on send != 0 (0x%x)\n", + __func__, bd.buffer2_size & 0xfff); ++ break; + } +- if (len >= sizeof(frame)) { ++ if (frame_size + len >= sizeof(frame)) { + DEBUGF_BRK("qemu:%s: buffer overflow %d read into %zu " +- "buffer\n" , __func__, len, sizeof(frame)); ++ "buffer\n" , __func__, frame_size + len, sizeof(frame)); + DEBUGF_BRK("qemu:%s: buffer1.size=%d; buffer2.size=%d\n", + __func__, bd.buffer1_size, bd.buffer2_size); ++ break; + } + + cpu_physical_memory_read(bd.buffer1_addr, ptr, len); +-- +2.23.0 + diff --git a/hw-pci-host-add-pci-intack-write-method.patch b/hw-pci-host-add-pci-intack-write-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..bb09d022bb7b23fae8cc34e7c7feae65c6e5bc3e --- /dev/null +++ b/hw-pci-host-add-pci-intack-write-method.patch @@ -0,0 +1,50 @@ +From 80214941ed6ce24983d8f161a7c9532678acc6f1 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:03:57 +0800 +Subject: [PATCH] hw/pci-host: add pci-intack write method + +fix CVE-2020-15469 + +Add pci-intack mmio write method to avoid NULL pointer dereference +issue. + +Reported-by: Lei Sun +Reviewed-by: Li Qiang +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/pci-host/prep.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c +index c564f234af..f03c81f651 100644 +--- a/hw/pci-host/prep.c ++++ b/hw/pci-host/prep.c +@@ -26,6 +26,7 @@ + #include "qemu/osdep.h" + #include "qemu-common.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "qapi/error.h" + #include "hw/hw.h" + #include "hw/pci/pci.h" +@@ -117,8 +118,15 @@ static uint64_t raven_intack_read(void *opaque, hwaddr addr, + return pic_read_irq(isa_pic); + } + ++static void raven_intack_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); ++} ++ + static const MemoryRegionOps raven_intack_ops = { + .read = raven_intack_read, ++ .write = raven_intack_write, + .valid = { + .max_access_size = 1, + }, +-- +2.27.0 + diff --git a/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch b/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch new file mode 100644 index 0000000000000000000000000000000000000000..76497d9ef4f4e111baba53cdd84ac7b7dbecb112 --- /dev/null +++ b/hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch @@ -0,0 +1,67 @@ +From 595a0d0a0f21cd73863ea3b78ecccb6e0ea8b7a8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Mon, 1 Jun 2020 16:29:25 +0200 +Subject: [PATCH 2/5] hw/pci/pci_bridge: Correct pci_bridge_io memory region + size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +memory_region_set_size() handle the 16 Exabytes limit by +special-casing the UINT64_MAX value. This is not a problem +for the 32-bit maximum, 4 GiB. +By using the UINT32_MAX value, the pci_bridge_io MemoryRegion +ends up missing 1 byte: + + (qemu) info mtree + memory-region: pci_bridge_io + 0000000000000000-00000000fffffffe (prio 0, i/o): pci_bridge_io + 0000000000000060-0000000000000060 (prio 0, i/o): i8042-data + 0000000000000064-0000000000000064 (prio 0, i/o): i8042-cmd + 00000000000001ce-00000000000001d1 (prio 0, i/o): vbe + 0000000000000378-000000000000037f (prio 0, i/o): parallel + 00000000000003b4-00000000000003b5 (prio 0, i/o): vga + ... + +Fix by using the correct value. We now have: + + memory-region: pci_bridge_io + 0000000000000000-00000000ffffffff (prio 0, i/o): pci_bridge_io + 0000000000000060-0000000000000060 (prio 0, i/o): i8042-data + 0000000000000064-0000000000000064 (prio 0, i/o): i8042-cmd + ... + +Reviewed-by: Peter Maydell +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20200601142930.29408-4-f4bug@amsat.org> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Richard Henderson +--- + hw/pci/pci_bridge.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c +index 715b9a4f..d67c691d 100644 +--- a/hw/pci/pci_bridge.c ++++ b/hw/pci/pci_bridge.c +@@ -30,6 +30,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "hw/pci/pci_bridge.h" + #include "hw/pci/pci_bus.h" + #include "qemu/module.h" +@@ -381,7 +382,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename) + memory_region_init(&br->address_space_mem, OBJECT(br), "pci_bridge_pci", UINT64_MAX); + sec_bus->address_space_io = &br->address_space_io; + memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io", +- UINT32_MAX); ++ 4 * GiB); + br->windows = pci_bridge_region_init(br); + QLIST_INIT(&sec_bus->child); + QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); +-- +2.23.0 + diff --git a/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch b/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2f772c6ac1ac3c7b5cdcbf5e2ac033903a723e3 --- /dev/null +++ b/hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch @@ -0,0 +1,68 @@ +From 86f70ed090478cc3b569b3606eb2723a0baadb52 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Tue, 16 Jun 2020 12:25:36 -0400 +Subject: [PATCH] hw/pci/pcie: Move hot plug capability check to pre_plug + callback + +RH-Author: Julia Suvorova +Message-id: <20200616122536.1027685-1-jusual@redhat.com> +Patchwork-id: 97548 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/1] hw/pci/pcie: Move hot plug capability check to pre_plug callback +Bugzilla: 1820531 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Auger Eric +RH-Acked-by: Sergio Lopez Pascual + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1820531 +BRANCH: rhel-av-8.2.1 +UPSTREAM: merged +BREW: 29422092 + +Check for hot plug capability earlier to avoid removing devices attached +during the initialization process. + +Run qemu with an unattached drive: + -drive file=$FILE,if=none,id=drive0 \ + -device pcie-root-port,id=rp0,slot=3,bus=pcie.0,hotplug=off +Hotplug a block device: + device_add virtio-blk-pci,id=blk0,drive=drive0,bus=rp0 +If hotplug fails on plug_cb, drive0 will be deleted. + +Fixes: 0501e1aa1d32a6 ("hw/pci/pcie: Forbid hot-plug if it's disabled on the slot") + +Acked-by: Igor Mammedov +Signed-off-by: Julia Suvorova +Message-Id: <20200604125947.881210-1-jusual@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0dabc0f6544f2c0310546f6d6cf3b68979580a9c) +Signed-off-by: Eduardo Lima (Etrunko) +--- + hw/pci/pcie.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 2b4eedd2bb..b5190a3a55 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -419,6 +419,17 @@ static void pcie_cap_slot_plug_common(PCIDevice *hotplug_dev, DeviceState *dev, + void pcie_cap_slot_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); ++ uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; ++ uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); ++ ++ /* Check if hot-plug is disabled on the slot */ ++ if (dev->hotplugged && (sltcap & PCI_EXP_SLTCAP_HPC) == 0) { ++ error_setg(errp, "Hot-plug failed: unsupported by the port device '%s'", ++ DEVICE(hotplug_pdev)->id); ++ return; ++ } ++ + pcie_cap_slot_plug_common(PCI_DEVICE(hotplug_dev), dev, errp); + } + +-- +2.27.0 + diff --git a/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch b/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..ad3fc3a8356d20d28805db548c12b2b3745e8054 --- /dev/null +++ b/hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch @@ -0,0 +1,36 @@ +From 95cbe18c649a20f98562a993537a67e0ad78bf36 Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Tue, 21 Jan 2020 10:29:34 -0500 +Subject: [PATCH 08/19] hw/ppc/Kconfig: Enable TPM_SPAPR as part of PSERIES + config +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Stefan Berger +Reviewed-by: Marc-André Lureau +Reviewed-by: David Gibson +Message-Id: <20200121152935.649898-6-stefanb@linux.ibm.com> +[dwg: Use default in Kconfig rather than select to avoid breaking + Windows host build] +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + hw/tpm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig +index 4d4ab085..9e67d990 100644 +--- a/hw/tpm/Kconfig ++++ b/hw/tpm/Kconfig +@@ -25,6 +25,6 @@ config TPM_EMULATOR + + config TPM_SPAPR + bool +- default n ++ default y + depends on TPM && PSERIES + select TPMDEV +-- +2.23.0 + diff --git a/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch b/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch new file mode 100644 index 0000000000000000000000000000000000000000..12c907453efdaa1141217b3adccf27d4099ee924 --- /dev/null +++ b/hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch @@ -0,0 +1,132 @@ +From 5ec15fabe78e385a81e44c7944cd05309de7f36e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Jun 2020 09:26:29 +0200 +Subject: [PATCH 7/9] hw/scsi/megasas: Fix possible out-of-bounds array access + in tracepoints +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some tracepoints in megasas.c use a guest-controlled value as an index +into the mfi_frame_desc[] array. Thus a malicious guest could cause an +out-of-bounds error here. Fortunately, the impact is very low since this +can only happen when the corresponding tracepoints have been enabled +before, but the problem should be fixed anyway with a proper check. + +Buglink: https://bugs.launchpad.net/qemu/+bug/1882065 +Signed-off-by: Thomas Huth +Message-Id: <20200615072629.32321-1-thuth@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +--- + hw/scsi/megasas.c | 36 +++++++++++++++++++++++------------- + 1 file changed, 23 insertions(+), 13 deletions(-) + +diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c +index 94469e8169..9421f4d14e 100644 +--- a/hw/scsi/megasas.c ++++ b/hw/scsi/megasas.c +@@ -53,10 +53,6 @@ + #define MEGASAS_FLAG_USE_QUEUE64 1 + #define MEGASAS_MASK_USE_QUEUE64 (1 << MEGASAS_FLAG_USE_QUEUE64) + +-static const char *mfi_frame_desc[] = { +- "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", +- "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop"}; +- + typedef struct MegasasCmd { + uint32_t index; + uint16_t flags; +@@ -182,6 +178,20 @@ static void megasas_frame_set_scsi_status(MegasasState *s, + stb_pci_dma(pci, frame + offsetof(struct mfi_frame_header, scsi_status), v); + } + ++static inline const char *mfi_frame_desc(unsigned int cmd) ++{ ++ static const char *mfi_frame_descs[] = { ++ "MFI init", "LD Read", "LD Write", "LD SCSI", "PD SCSI", ++ "MFI Doorbell", "MFI Abort", "MFI SMP", "MFI Stop" ++ }; ++ ++ if (cmd < ARRAY_SIZE(mfi_frame_descs)) { ++ return mfi_frame_descs[cmd]; ++ } ++ ++ return "Unknown"; ++} ++ + /* + * Context is considered opaque, but the HBA firmware is running + * in little endian mode. So convert it to little endian, too. +@@ -1669,25 +1679,25 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, + if (is_logical) { + if (target_id >= MFI_MAX_LD || lun_id != 0) { + trace_megasas_scsi_target_not_present( +- mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); ++ mfi_frame_desc(frame_cmd), is_logical, target_id, lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + } + sdev = scsi_device_find(&s->bus, 0, target_id, lun_id); + + cmd->iov_size = le32_to_cpu(cmd->frame->header.data_len); +- trace_megasas_handle_scsi(mfi_frame_desc[frame_cmd], is_logical, ++ trace_megasas_handle_scsi(mfi_frame_desc(frame_cmd), is_logical, + target_id, lun_id, sdev, cmd->iov_size); + + if (!sdev || (megasas_is_jbod(s) && is_logical)) { + trace_megasas_scsi_target_not_present( +- mfi_frame_desc[frame_cmd], is_logical, target_id, lun_id); ++ mfi_frame_desc(frame_cmd), is_logical, target_id, lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + + if (cdb_len > 16) { + trace_megasas_scsi_invalid_cdb_len( +- mfi_frame_desc[frame_cmd], is_logical, ++ mfi_frame_desc(frame_cmd), is_logical, + target_id, lun_id, cdb_len); + megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; +@@ -1705,7 +1715,7 @@ static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd, + cmd->req = scsi_req_new(sdev, cmd->index, lun_id, cdb, cmd); + if (!cmd->req) { + trace_megasas_scsi_req_alloc_failed( +- mfi_frame_desc[frame_cmd], target_id, lun_id); ++ mfi_frame_desc(frame_cmd), target_id, lun_id); + megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); + cmd->frame->header.scsi_status = BUSY; + s->event_count++; +@@ -1750,17 +1760,17 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) + } + + trace_megasas_handle_io(cmd->index, +- mfi_frame_desc[frame_cmd], target_id, lun_id, ++ mfi_frame_desc(frame_cmd), target_id, lun_id, + (unsigned long)lba_start, (unsigned long)lba_count); + if (!sdev) { + trace_megasas_io_target_not_present(cmd->index, +- mfi_frame_desc[frame_cmd], target_id, lun_id); ++ mfi_frame_desc(frame_cmd), target_id, lun_id); + return MFI_STAT_DEVICE_NOT_FOUND; + } + + if (cdb_len > 16) { + trace_megasas_scsi_invalid_cdb_len( +- mfi_frame_desc[frame_cmd], 1, target_id, lun_id, cdb_len); ++ mfi_frame_desc(frame_cmd), 1, target_id, lun_id, cdb_len); + megasas_write_sense(cmd, SENSE_CODE(INVALID_OPCODE)); + cmd->frame->header.scsi_status = CHECK_CONDITION; + s->event_count++; +@@ -1780,7 +1790,7 @@ static int megasas_handle_io(MegasasState *s, MegasasCmd *cmd, int frame_cmd) + lun_id, cdb, cmd); + if (!cmd->req) { + trace_megasas_scsi_req_alloc_failed( +- mfi_frame_desc[frame_cmd], target_id, lun_id); ++ mfi_frame_desc(frame_cmd), target_id, lun_id); + megasas_write_sense(cmd, SENSE_CODE(NO_SENSE)); + cmd->frame->header.scsi_status = BUSY; + s->event_count++; +-- +2.25.1 + diff --git a/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch b/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch new file mode 100644 index 0000000000000000000000000000000000000000..42df9650a9e378fbb0d96afbd5b8a844c8ed64c7 --- /dev/null +++ b/hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch @@ -0,0 +1,25 @@ +From 8b8d3992db22a583b69b6e2ae1d9cd87e2179e21 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Fri, 18 Sep 2020 10:55:22 +0800 +Subject: [PATCH] hw/sd/sdhci: Fix DMA Transfer Block Size field The 'Transfer + Block Size' field is 12-bit wide. See section '2.2.2 Block Size Register + (Offset 004h)' in datasheet. + +Buglink: https://bugs.launchpad.net/qemu/+bug/1892960 + +diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c +index 7b80b1d9..acf482b8 100644 +--- a/hw/sd/sdhci.c ++++ b/hw/sd/sdhci.c +@@ -1127,7 +1127,7 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) + break; + case SDHC_BLKSIZE: + if (!TRANSFERRING_DATA(s->prnsts)) { +- MASKED_WRITE(s->blksize, mask, value); ++ MASKED_WRITE(s->blksize, mask, extract32(value, 0, 12)); + MASKED_WRITE(s->blkcnt, mask >> 16, value >> 16); + } + +-- +2.23.0 + diff --git a/hw-tpm-rename-Error-parameter-to-more-common-errp.patch b/hw-tpm-rename-Error-parameter-to-more-common-errp.patch new file mode 100644 index 0000000000000000000000000000000000000000..a47a1ae68da792d7811b9d85cb5cbd5f5d5ac0cd --- /dev/null +++ b/hw-tpm-rename-Error-parameter-to-more-common-errp.patch @@ -0,0 +1,58 @@ +From f2dceb3cde537210896a2cadb8958cfd310113a3 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Thu, 5 Dec 2019 20:46:30 +0300 +Subject: [PATCH 01/19] hw/tpm: rename Error ** parameter to more common errp +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Stefan Berger +Message-Id: <20191205174635.18758-17-vsementsov@virtuozzo.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Markus Armbruster +Signed-off-by: jiangfangjie +--- + hw/tpm/tpm_emulator.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c +index fc0b512f..38bf5fd6 100644 +--- a/hw/tpm/tpm_emulator.c ++++ b/hw/tpm/tpm_emulator.c +@@ -155,7 +155,7 @@ static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu, + const uint8_t *in, uint32_t in_len, + uint8_t *out, uint32_t out_len, + bool *selftest_done, +- Error **err) ++ Error **errp) + { + ssize_t ret; + bool is_selftest = false; +@@ -165,20 +165,20 @@ static int tpm_emulator_unix_tx_bufs(TPMEmulator *tpm_emu, + is_selftest = tpm_util_is_selftest(in, in_len); + } + +- ret = qio_channel_write_all(tpm_emu->data_ioc, (char *)in, in_len, err); ++ ret = qio_channel_write_all(tpm_emu->data_ioc, (char *)in, in_len, errp); + if (ret != 0) { + return -1; + } + + ret = qio_channel_read_all(tpm_emu->data_ioc, (char *)out, +- sizeof(struct tpm_resp_hdr), err); ++ sizeof(struct tpm_resp_hdr), errp); + if (ret != 0) { + return -1; + } + + ret = qio_channel_read_all(tpm_emu->data_ioc, + (char *)out + sizeof(struct tpm_resp_hdr), +- tpm_cmd_get_size(out) - sizeof(struct tpm_resp_hdr), err); ++ tpm_cmd_get_size(out) - sizeof(struct tpm_resp_hdr), errp); + if (ret != 0) { + return -1; + } +-- +2.23.0 + diff --git a/hw-usb-core-fix-buffer-overflow.patch b/hw-usb-core-fix-buffer-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..494955788a2506fd2d28521ff234118025fbe674 --- /dev/null +++ b/hw-usb-core-fix-buffer-overflow.patch @@ -0,0 +1,46 @@ +hw-usb-core-fix-buffer-overflow + +From 18ad0451f113ffc3a2ff59c059d189cca1e42842 Mon Sep 17 00:00:00 2001 +From: root +Date: Wed, 19 Aug 2020 17:04:04 +0800 +Subject: [PATCH] hw/usb/core.c fix buffer overflow + +Store calculated setup_len in a local variable, verify it, + and only write it to the struct (USBDevice->setup_len) in case it passed the + sanity checks. + +This prevents other code (do_token_{in,out} function specifically) +from working with invalid USBDevice->setup_len values and overruning +the USBDevice->setup_buf[] buffer. +Store +Fixes: CVE-2020-14364 +Signed-off-by: Gred Hoffman +--- + hw/usb/core.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 5abd128b..12342f13 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -144,6 +144,8 @@ static void do_token_setup(USBDevice *s, USBPacket *p) + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", + s->setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; ++ s->setup_len = 0; ++ s->setup_state = SETUP_STATE_ACK; + return; + } + +@@ -277,6 +279,8 @@ static void do_parameter(USBDevice *s, USBPacket *p) + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", + s->setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; ++ s->setup_len = 0; ++ s->setup_state = SETUP_STATE_ACK; + return; + } + +-- +2.23.0 + diff --git a/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch b/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch new file mode 100644 index 0000000000000000000000000000000000000000..96a45b8100318237976abc51ac2b584b569e018a --- /dev/null +++ b/hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch @@ -0,0 +1,40 @@ +From b1398dc6f3eb16e006167bdd8666fb7c52918e13 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 15 Sep 2020 23:52:59 +0530 +Subject: [PATCH] hw: usb: hcd-ohci: check for processed TD before retire + +While servicing OHCI transfer descriptors(TD), ohci_service_iso_td +retires a TD if it has passed its time frame. It does not check if +the TD was already processed once and holds an error code in TD_CC. +It may happen if the TD list has a loop. Add check to avoid an +infinite loop condition. + +Signed-off-by: Prasad J Pandit +Reviewed-by: Li Qiang +Message-id: 20200915182259.68522-3-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry-picked from 1be90ebe) +Fix CVE-2020-25625 +Signed-off-by: Alex Chen +--- + hw/usb/hcd-ohci.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c +index 4f6fdbc0a7..ffe52a09d7 100644 +--- a/hw/usb/hcd-ohci.c ++++ b/hw/usb/hcd-ohci.c +@@ -689,6 +689,10 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, + the next ISO TD of the same ED */ + trace_usb_ohci_iso_td_relative_frame_number_big(relative_frame_number, + frame_count); ++ if (OHCI_CC_DATAOVERRUN == OHCI_BM(iso_td.flags, TD_CC)) { ++ /* avoid infinite loop */ ++ return 1; ++ } + OHCI_SET_BM(iso_td.flags, TD_CC, OHCI_CC_DATAOVERRUN); + ed->head &= ~OHCI_DPTR_MASK; + ed->head |= (iso_td.next & OHCI_DPTR_MASK); +-- +2.27.0 + diff --git a/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch b/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch new file mode 100644 index 0000000000000000000000000000000000000000..0133d70db8abfb7338a57f5cf305c68ac8811e56 --- /dev/null +++ b/hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch @@ -0,0 +1,99 @@ +From 789723b95045b6e44d1d1aef56a8bcb255a10476 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 15 Sep 2020 23:52:58 +0530 +Subject: [PATCH] hw: usb: hcd-ohci: check len and frame_number variables + +While servicing the OHCI transfer descriptors(TD), OHCI host +controller derives variables 'start_addr', 'end_addr', 'len' +etc. from values supplied by the host controller driver. +Host controller driver may supply values such that using +above variables leads to out-of-bounds access issues. +Add checks to avoid them. + +AddressSanitizer: stack-buffer-overflow on address 0x7ffd53af76a0 + READ of size 2 at 0x7ffd53af76a0 thread T0 + #0 ohci_service_iso_td ../hw/usb/hcd-ohci.c:734 + #1 ohci_service_ed_list ../hw/usb/hcd-ohci.c:1180 + #2 ohci_process_lists ../hw/usb/hcd-ohci.c:1214 + #3 ohci_frame_boundary ../hw/usb/hcd-ohci.c:1257 + #4 timerlist_run_timers ../util/qemu-timer.c:572 + #5 qemu_clock_run_timers ../util/qemu-timer.c:586 + #6 qemu_clock_run_all_timers ../util/qemu-timer.c:672 + #7 main_loop_wait ../util/main-loop.c:527 + #8 qemu_main_loop ../softmmu/vl.c:1676 + #9 main ../softmmu/main.c:50 + +Reported-by: Gaoning Pan +Reported-by: Yongkang Jia +Reported-by: Yi Ren +Signed-off-by: Prasad J Pandit +Message-id: 20200915182259.68522-2-ppandit@redhat.com +Signed-off-by: Gerd Hoffmann +(cherry-picked from 1328fe0c) +Fix CVE-2020-25624 +Signed-off-by: Alex Chen +--- + hw/usb/hcd-ohci.c | 24 ++++++++++++++++++++++-- + 1 file changed, 22 insertions(+), 2 deletions(-) + +diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c +index ffe52a09d7..d2dd8efd58 100644 +--- a/hw/usb/hcd-ohci.c ++++ b/hw/usb/hcd-ohci.c +@@ -733,7 +733,11 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, + } + + start_offset = iso_td.offset[relative_frame_number]; +- next_offset = iso_td.offset[relative_frame_number + 1]; ++ if (relative_frame_number < frame_count) { ++ next_offset = iso_td.offset[relative_frame_number + 1]; ++ } else { ++ next_offset = iso_td.be; ++ } + + if (!(OHCI_BM(start_offset, TD_PSW_CC) & 0xe) || + ((relative_frame_number < frame_count) && +@@ -766,7 +770,12 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, + } + } else { + /* Last packet in the ISO TD */ +- end_addr = iso_td.be; ++ end_addr = next_offset; ++ } ++ ++ if (start_addr > end_addr) { ++ trace_usb_ohci_iso_td_bad_cc_overrun(start_addr, end_addr); ++ return 1; + } + + if ((start_addr & OHCI_PAGE_MASK) != (end_addr & OHCI_PAGE_MASK)) { +@@ -775,6 +784,9 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed, + } else { + len = end_addr - start_addr + 1; + } ++ if (len > sizeof(ohci->usb_buf)) { ++ len = sizeof(ohci->usb_buf); ++ } + + if (len && dir != OHCI_TD_DIR_IN) { + if (ohci_copy_iso_td(ohci, start_addr, end_addr, ohci->usb_buf, len, +@@ -977,8 +989,16 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed) + if ((td.cbp & 0xfffff000) != (td.be & 0xfffff000)) { + len = (td.be & 0xfff) + 0x1001 - (td.cbp & 0xfff); + } else { ++ if (td.cbp > td.be) { ++ trace_usb_ohci_iso_td_bad_cc_overrun(td.cbp, td.be); ++ ohci_die(ohci); ++ return 1; ++ } + len = (td.be - td.cbp) + 1; + } ++ if (len > sizeof(ohci->usb_buf)) { ++ len = sizeof(ohci->usb_buf); ++ } + + pktlen = len; + if (len && dir != OHCI_TD_DIR_IN) { +-- +2.27.0 + diff --git a/hw-vfio-common-trace-vfio_connect_container-operatio.patch b/hw-vfio-common-trace-vfio_connect_container-operatio.patch new file mode 100644 index 0000000000000000000000000000000000000000..bd952359250359770ea8d51711e88be943ee2c72 --- /dev/null +++ b/hw-vfio-common-trace-vfio_connect_container-operatio.patch @@ -0,0 +1,53 @@ +From b107e6ec2a5a34e0ba95345a89dcf5f505ad9da4 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 22 Feb 2021 10:13:55 -0500 +Subject: [PATCH] hw/vfio/common: trace vfio_connect_container operations + +We currently trace vfio_disconnect_container() but we do not trace +the container <-> group creation, which can be useful to understand +the VFIO topology. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 3 +++ + hw/vfio/trace-events | 2 ++ + 2 files changed, 5 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 206fb83e28..fefa2ccfdf 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1848,6 +1848,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_FOREACH(container, &space->containers, next) { + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + group->container = container; ++ trace_vfio_connect_existing_container(group->groupid, ++ container->fd); + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + vfio_kvm_device_add_group(group); + return 0; +@@ -1881,6 +1883,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + if (ret) { + goto free_container_exit; + } ++ trace_vfio_connect_new_container(group->groupid, container->fd); + + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 575ebde6e0..561dc6e758 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -102,6 +102,8 @@ vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t si + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_disconnect_container(int fd) "close container->fd=%d" ++vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d" ++vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d" + vfio_put_group(int fd) "close group->fd=%d" + vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u" + vfio_put_base_device(int fd) "close vdev->fd=%d" +-- +2.27.0 + diff --git a/hw-xhci-check-return-value-of-usb_packet_map.patch b/hw-xhci-check-return-value-of-usb_packet_map.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd81478de3a588852232349f483bbf16dd403034 --- /dev/null +++ b/hw-xhci-check-return-value-of-usb_packet_map.patch @@ -0,0 +1,31 @@ +From e43f0019b0aff881c562c8d2428bce6b3d55845c Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Fri, 18 Sep 2020 11:08:28 +0800 +Subject: [PATCH] hw: xhci: check return value of 'usb_packet_map' + +Currently we don't check the return value of 'usb_packet_map', +this will cause an NAF issue. This is LP#1891341. +Following is the reproducer provided in: +-->https://bugs.launchpad.net/qemu/+bug/1891341 + +This patch fixes this. + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index a21485fe..3b25abca 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1614,7 +1614,10 @@ static int xhci_setup_packet(XHCITransfer *xfer) + xhci_xfer_create_sgl(xfer, dir == USB_TOKEN_IN); /* Also sets int_req */ + usb_packet_setup(&xfer->packet, dir, ep, xfer->streamid, + xfer->trbs[0].addr, false, xfer->int_req); +- usb_packet_map(&xfer->packet, &xfer->sgl); ++ if (usb_packet_map(&xfer->packet, &xfer->sgl)) { ++ qemu_sglist_destroy(&xfer->sgl); ++ return -1; ++ } + DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n", + xfer->packet.pid, ep->dev->addr, ep->nr); + return 0; +-- +2.23.0 + diff --git a/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch b/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfa4a7064590348b9353dee3515e9682d926cd3d --- /dev/null +++ b/i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch @@ -0,0 +1,44 @@ +From a6206163d42156cb9de290f914c6883c77b012b9 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Wed, 25 Sep 2019 23:49:48 +0200 +Subject: [PATCH] i386: Add CPUID bit for CLZERO and XSAVEERPTR + +The CPUID bits CLZERO and XSAVEERPTR are availble on AMD's ZEN platform +and could be passed to the guest. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 2 +- + target/i386/cpu.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f09612f9da..e65f372f25 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1134,7 +1134,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + [FEAT_8000_0008_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +- NULL, NULL, NULL, NULL, ++ "clzero", NULL, "xsaveerptr", NULL, + NULL, NULL, NULL, NULL, + NULL, "wbnoinvd", NULL, NULL, + "ibpb", NULL, NULL, NULL, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 7ff8ddd464..24d489db0f 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -696,6 +696,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + + #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) /* AVX512 BFloat16 Instruction */ + ++#define CPUID_8000_0008_EBX_CLZERO (1U << 0) /* CLZERO instruction */ ++#define CPUID_8000_0008_EBX_XSAVEERPTR (1U << 2) /* Always save/restore FP error pointers */ + #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and + do not invalidate cache */ + #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ +-- +2.27.0 + diff --git a/i386-Add-MSR-feature-bit-for-MDS-NO.patch b/i386-Add-MSR-feature-bit-for-MDS-NO.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a24836e3bf8e9084dea784deec9f75b50f96633 --- /dev/null +++ b/i386-Add-MSR-feature-bit-for-MDS-NO.patch @@ -0,0 +1,34 @@ +From aaa6c86f46232c68f6846b2da859e4e0b8198664 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Tue, 22 Oct 2019 15:35:26 +0800 +Subject: [PATCH] i386: Add MSR feature bit for MDS-NO + +Define MSR_ARCH_CAP_MDS_NO in the IA32_ARCH_CAPABILITIES MSR to allow +CPU models to report the feature when host supports it. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-2-git-send-email-cathy.zhang@intel.com> +Signed-off-by: Eduardo Habkost + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 488b4dc778..9ef868eb71 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -747,6 +747,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define MSR_ARCH_CAP_RSBA (1U << 2) + #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) + #define MSR_ARCH_CAP_SSB_NO (1U << 4) ++#define MSR_ARCH_CAP_MDS_NO (1U << 5) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.27.0 + diff --git a/i386-Add-macro-for-stibp.patch b/i386-Add-macro-for-stibp.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf53f56757197ffdc94f388b01800a82d32aed4a --- /dev/null +++ b/i386-Add-macro-for-stibp.patch @@ -0,0 +1,36 @@ +From 67f68f735af6b1ba829689af2e021bba97e7132a Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Tue, 22 Oct 2019 15:35:27 +0800 +Subject: [PATCH] i386: Add macro for stibp + +stibp feature is already added through the following commit. +https://github.com/qemu/qemu/commit/0e8916582991b9fd0b94850a8444b8b80d0a0955 + +Add a macro for it to allow CPU models to report it when host supports. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-3-git-send-email-cathy.zhang@intel.com> +Signed-off-by: Eduardo Habkost + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 9ef868eb71..58d8c48964 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -689,6 +689,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ + #define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ ++#define CPUID_7_0_EDX_STIBP (1U << 27) /* Single Thread Indirect Branch Predictors */ + #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ + #define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) /*Core Capability*/ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ +-- +2.27.0 + diff --git a/i386-Add-new-CPU-model-Cooperlake.patch b/i386-Add-new-CPU-model-Cooperlake.patch new file mode 100644 index 0000000000000000000000000000000000000000..60d249fdcd743a6c57c3d1c7051de82c0e1fefe4 --- /dev/null +++ b/i386-Add-new-CPU-model-Cooperlake.patch @@ -0,0 +1,96 @@ +From 8e9eb2f71396e3293d9ba9b1cfaf5f1487f1d475 Mon Sep 17 00:00:00 2001 +From: Cathy Zhang +Date: Tue, 22 Oct 2019 15:35:28 +0800 +Subject: [PATCH] i386: Add new CPU model Cooperlake + +Cooper Lake is intel's successor to Cascade Lake, the new +CPU model inherits features from Cascadelake-Server, while +add one platform associated new feature: AVX512_BF16. Meanwhile, +add STIBP for speculative execution. + +Signed-off-by: Cathy Zhang +Reviewed-by: Xiaoyao Li +Reviewed-by: Tao Xu +Message-Id: <1571729728-23284-4-git-send-email-cathy.zhang@intel.com> +Reviewed-by: Bruce Rogers +Signed-off-by: Eduardo Habkost + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 60 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 1ade90c28b..5329d73316 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2378,6 +2378,66 @@ static X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + } + }, ++ { ++ .name = "Cooperlake", ++ .level = 0xd, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 85, ++ .stepping = 10, ++ .features[FEAT_1_EDX] = ++ CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | ++ CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | ++ CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | ++ CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | ++ CPUID_DE | CPUID_FP87, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | ++ CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | ++ CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | ++ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | ++ CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | ++ CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | ++ CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | ++ CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | ++ CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | ++ CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | ++ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | ++ CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_PKU | ++ CPUID_7_0_ECX_AVX512VNNI, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX512_BF16, ++ /* ++ * Missing: XSAVES (not supported by some Linux versions, ++ * including v4.1 to v4.12). ++ * KVM doesn't yet expose any XSAVES state save component, ++ * and the only one defined in Skylake (processor tracing) ++ * probably will block migration anyway. ++ */ ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (Cooperlake)", ++ }, + { + .name = "Icelake-Client", + .level = 0xd, +-- +2.27.0 + diff --git a/i386-Resolve-CPU-models-to-v1-by-default.patch b/i386-Resolve-CPU-models-to-v1-by-default.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8c6315866df7028a1135869a7aa44234eb07b44 --- /dev/null +++ b/i386-Resolve-CPU-models-to-v1-by-default.patch @@ -0,0 +1,81 @@ +From 6a5e994c1dec959143f6d3f83169a7adcb173fc4 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Thu, 5 Dec 2019 19:33:39 -0300 +Subject: [PATCH] i386: Resolve CPU models to v1 by default + +When using `query-cpu-definitions` using `-machine none`, +QEMU is resolving all CPU models to their latest versions. The +actual CPU model version being used by another machine type (e.g. +`pc-q35-4.0`) might be different. + +In theory, this was OK because the correct CPU model +version is returned when using the correct `-machine` argument. + +Except that in practice, this breaks libvirt expectations: +libvirt always use `-machine none` when checking if a CPU model +is runnable, because runnability is not expected to be affected +when the machine type is changed. + +For example, when running on a Haswell host without TSX, +Haswell-v4 is runnable, but Haswell-v1 is not. On those hosts, +`query-cpu-definitions` says Haswell is runnable if using +`-machine none`, but Haswell is actually not runnable using any +of the `pc-*` machine types (because they resolve Haswell to +Haswell-v1). In other words, we're breaking the "runnability +guarantee" we promised to not break for a few releases (see +qemu-deprecated.texi). + +To address this issue, change the default CPU model version to v1 +on all machine types, so we make `query-cpu-definitions` output +when using `-machine none` match the results when using `pc-*`. +This will change in the future (the plan is to always return the +latest CPU model version if using `-machine none`), but only +after giving libvirt the opportunity to adapt. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1779078 +Signed-off-by: Eduardo Habkost +Message-Id: <20191205223339.764534-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +--- + qemu-deprecated.texi | 8 ++++++++ + target/i386/cpu.c | 8 +++++++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/qemu-deprecated.texi b/qemu-deprecated.texi +index fff07bb2a3..719ac23d72 100644 +--- a/qemu-deprecated.texi ++++ b/qemu-deprecated.texi +@@ -331,3 +331,11 @@ existing CPU models. Management software that needs runnability + guarantees must resolve the CPU model aliases using te + ``alias-of'' field returned by the ``query-cpu-definitions'' QMP + command. ++ ++While those guarantees are kept, the return value of ++``query-cpu-definitions'' will have existing CPU model aliases ++point to a version that doesn't break runnability guarantees ++(specifically, version 1 of those CPU models). In future QEMU ++versions, aliases will point to newer CPU model versions ++depending on the machine type, so management software must ++resolve CPU model aliases before starting a virtual machine. +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index e0f3a2dd99..22e0e89718 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3933,7 +3933,13 @@ static PropValue tcg_default_props[] = { + }; + + +-X86CPUVersion default_cpu_version = CPU_VERSION_LATEST; ++/* ++ * We resolve CPU model aliases using -v1 when using "-machine ++ * none", but this is just for compatibility while libvirt isn't ++ * adapted to resolve CPU model versions before creating VMs. ++ * See "Runnability guarantee of CPU models" at * qemu-deprecated.texi. ++ */ ++X86CPUVersion default_cpu_version = 1; + + void x86_cpu_set_default_version(X86CPUVersion version) + { +-- +2.27.0 + diff --git a/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch b/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch new file mode 100644 index 0000000000000000000000000000000000000000..3e42e71e5d9e2d038d4540fa0b57b87adcbf92d7 --- /dev/null +++ b/i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch @@ -0,0 +1,33 @@ +From e6f3e08acd55d13cbb154ff8abb1b3c2ed658285 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Tue, 14 Jul 2020 01:44:36 +0800 +Subject: [PATCH] i386/cpu: Don't add unavailable_features to + env->user_features + +Features unavailable due to absent of their dependent features should +not be added to env->user_features. env->user_features only contains the +feature explicity specified with -feature/+feature by user. + +Fixes: 99e24dbdaa68 ("target/i386: introduce generic feature dependency mechanism") +Signed-off-by: Xiaoyao Li +Message-Id: <20200713174436.41070-3-xiaoyao.li@intel.com> +Signed-off-by: Eduardo Habkost +--- + target/i386/cpu.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6f27a5170a..e0f3a2dd99 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6173,7 +6173,6 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + unavailable_features & env->user_features[d->to.index], + "This feature depends on other features that were not requested"); + +- env->user_features[d->to.index] |= unavailable_features; + env->features[d->to.index] &= ~unavailable_features; + } + } +-- +2.27.0 + diff --git a/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch b/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch new file mode 100644 index 0000000000000000000000000000000000000000..9570b46b755e06705212253195a6605d738db350 --- /dev/null +++ b/ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch @@ -0,0 +1,89 @@ +From ed78352a59ea7acf7520d4d47a96b9911bae7fc3 Mon Sep 17 00:00:00 2001 +From: Alexander Popov +Date: Mon, 23 Dec 2019 20:51:16 +0300 +Subject: [PATCH] ide: Fix incorrect handling of some PRDTs in ide_dma_cb() + +The commit a718978ed58a from July 2015 introduced the assertion which +implies that the size of successful DMA transfers handled in ide_dma_cb() +should be multiple of 512 (the size of a sector). But guest systems can +initiate DMA transfers that don't fit this requirement. + +For fixing that let's check the number of bytes prepared for the transfer +by the prepare_buf() handler. The code in ide_dma_cb() must behave +according to the Programming Interface for Bus Master IDE Controller +(Revision 1.0 5/16/94): +1. If PRDs specified a smaller size than the IDE transfer + size, then the Interrupt and Active bits in the Controller + status register are not set (Error Condition). +2. If the size of the physical memory regions was equal to + the IDE device transfer size, the Interrupt bit in the + Controller status register is set to 1, Active bit is set to 0. +3. If PRDs specified a larger size than the IDE transfer size, + the Interrupt and Active bits in the Controller status register + are both set to 1. + +Signed-off-by: Alexander Popov +Reviewed-by: Kevin Wolf +Message-id: 20191223175117.508990-2-alex.popov@linux.com +Signed-off-by: John Snow + +diff --git a/hw/ide/core.c b/hw/ide/core.c +index 754ff4dc34..80000eb766 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -849,6 +849,7 @@ static void ide_dma_cb(void *opaque, int ret) + int64_t sector_num; + uint64_t offset; + bool stay_active = false; ++ int32_t prep_size = 0; + + if (ret == -EINVAL) { + ide_dma_error(s); +@@ -863,13 +864,15 @@ static void ide_dma_cb(void *opaque, int ret) + } + } + +- n = s->io_buffer_size >> 9; +- if (n > s->nsector) { +- /* The PRDs were longer than needed for this request. Shorten them so +- * we don't get a negative remainder. The Active bit must remain set +- * after the request completes. */ ++ if (s->io_buffer_size > s->nsector * 512) { ++ /* ++ * The PRDs were longer than needed for this request. ++ * The Active bit must remain set after the request completes. ++ */ + n = s->nsector; + stay_active = true; ++ } else { ++ n = s->io_buffer_size >> 9; + } + + sector_num = ide_get_sector(s); +@@ -892,9 +895,20 @@ static void ide_dma_cb(void *opaque, int ret) + n = s->nsector; + s->io_buffer_index = 0; + s->io_buffer_size = n * 512; +- if (s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size) < 512) { +- /* The PRDs were too short. Reset the Active bit, but don't raise an +- * interrupt. */ ++ prep_size = s->bus->dma->ops->prepare_buf(s->bus->dma, s->io_buffer_size); ++ /* prepare_buf() must succeed and respect the limit */ ++ assert(prep_size >= 0 && prep_size <= n * 512); ++ ++ /* ++ * Now prep_size stores the number of bytes in the sglist, and ++ * s->io_buffer_size stores the number of bytes described by the PRDs. ++ */ ++ ++ if (prep_size < n * 512) { ++ /* ++ * The PRDs are too short for this request. Error condition! ++ * Reset the Active bit and don't raise the interrupt. ++ */ + s->status = READY_STAT | SEEK_STAT; + dma_buf_commit(s, 0); + goto eot; +-- +2.23.0 + diff --git a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch new file mode 100644 index 0000000000000000000000000000000000000000..97824d4d27236961e5a9c3e3f4242e73d1334d44 --- /dev/null +++ b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch @@ -0,0 +1,40 @@ +From c7fd5f3841f14c24e442fb6968c9f2d9e016f28a Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Mon, 21 Jun 2021 09:22:35 +0800 +Subject: [PATCH] ide: ahci: add check to avoid null dereference + (CVE-2019-12067) + +Fix CVE-2019-12067 + +AHCI emulator while committing DMA buffer in ahci_commit_buf() +may do a NULL dereference if the command header 'ad->cur_cmd' +is null. Add check to avoid it. + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/ide/ahci.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index 6aaf66534a..a7be0ae4fe 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1455,8 +1455,10 @@ static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) + { + AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); + +- tx_bytes += le32_to_cpu(ad->cur_cmd->status); +- ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ if (ad->cur_cmd) { ++ tx_bytes += le32_to_cpu(ad->cur_cmd->status); ++ ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ } + } + + static int ahci_dma_rw_buf(IDEDMA *dma, int is_write) +-- +2.27.0 + diff --git a/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch b/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch new file mode 100644 index 0000000000000000000000000000000000000000..da58bb9cc28d6e193d7b55ba530768d69a04324a --- /dev/null +++ b/ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch @@ -0,0 +1,52 @@ +From 5209fbd340efe3fa7f8ea82f671db2fa04dda19b Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 23 Feb 2021 15:20:03 +0800 +Subject: [PATCH] ide:atapi: check io_buffer_index in ide_atapi_cmd_reply_end + +Fix CVE-2020-29443 + +During data transfer via packet command in 'ide_atapi_cmd_reply_end' +'s->io_buffer_index' could exceed the 's->io_buffer' length, leading +to OOB access issue. Add check to avoid it. + ... + #9 ahci_pio_transfer ../hw/ide/ahci.c:1383 + #10 ide_transfer_start_norecurse ../hw/ide/core.c:553 + #11 ide_atapi_cmd_reply_end ../hw/ide/atapi.c:284 + #12 ide_atapi_cmd_read_pio ../hw/ide/atapi.c:329 + #13 ide_atapi_cmd_read ../hw/ide/atapi.c:442 + #14 cmd_read ../hw/ide/atapi.c:988 + #15 ide_atapi_cmd ../hw/ide/atapi.c:1352 + #16 ide_transfer_start ../hw/ide/core.c:561 + #17 cmd_packet ../hw/ide/core.c:1729 + #18 ide_exec_cmd ../hw/ide/core.c:2107 + #19 handle_reg_h2d_fis ../hw/ide/ahci.c:1267 + #20 handle_cmd ../hw/ide/ahci.c:1318 + #21 check_cmd ../hw/ide/ahci.c:592 + #22 ahci_port_write ../hw/ide/ahci.c:373 + #23 ahci_mem_write ../hw/ide/ahci.c:513 + +Reported-by: Wenxiang Qian +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/ide/atapi.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c +index 1b0f66cc08..fc9dc87f03 100644 +--- a/hw/ide/atapi.c ++++ b/hw/ide/atapi.c +@@ -300,6 +300,9 @@ void ide_atapi_cmd_reply_end(IDEState *s) + s->packet_transfer_size -= size; + s->elementary_transfer_size -= size; + s->io_buffer_index += size; ++ if (s->io_buffer_index > s->io_buffer_total_len) { ++ return; ++ } + + /* Some adapters process PIO data right away. In that case, we need + * to avoid mutual recursion between ide_transfer_start +-- +2.27.0 + diff --git a/ide-fix-leak-from-qemu_allocate_irqs.patch b/ide-fix-leak-from-qemu_allocate_irqs.patch new file mode 100644 index 0000000000000000000000000000000000000000..dce6e906ce92e7e303d2198d7a612905ca0632c1 --- /dev/null +++ b/ide-fix-leak-from-qemu_allocate_irqs.patch @@ -0,0 +1,28 @@ +From df35f8fe2687df32cb65f6a03b8dd80314cc4c53 Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 15:00:08 +0800 +Subject: [PATCH] ide: fix leak from qemu_allocate_irqs + +The array returned by qemu_allocate_irqs is malloced, free it. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Thomas Huth +--- + hw/ide/cmd646.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c +index ed23aabf..a149cd6c 100644 +--- a/hw/ide/cmd646.c ++++ b/hw/ide/cmd646.c +@@ -299,6 +299,7 @@ static void pci_cmd646_ide_realize(PCIDevice *dev, Error **errp) + d->bmdma[i].bus = &d->bus[i]; + ide_register_restart_cb(&d->bus[i]); + } ++ g_free(irq); + + vmstate_register(DEVICE(dev), 0, &vmstate_ide_pci, d); + qemu_register_reset(cmd646_reset, d); +-- +2.19.1 + diff --git a/imx7-ccm-add-digprog-mmio-write-method.patch b/imx7-ccm-add-digprog-mmio-write-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..b68bf028b9b3e9bc2fe2d1838f6d3cf64dce7866 --- /dev/null +++ b/imx7-ccm-add-digprog-mmio-write-method.patch @@ -0,0 +1,41 @@ +From 5979338f8fb4562f7af32c58b7e7542d7396954e Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:29:28 +0800 +Subject: [PATCH] imx7-ccm: add digprog mmio write method + +fix CVE-2020-15469 + +Add digprog mmio write method to avoid assert failure during +initialisation. + +Reviewed-by: Li Qiang +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/misc/imx7_ccm.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/misc/imx7_ccm.c b/hw/misc/imx7_ccm.c +index d9bdcf1027..831311a7c8 100644 +--- a/hw/misc/imx7_ccm.c ++++ b/hw/misc/imx7_ccm.c +@@ -130,8 +130,15 @@ static const struct MemoryRegionOps imx7_set_clr_tog_ops = { + }, + }; + ++static void imx7_digprog_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); ++} ++ + static const struct MemoryRegionOps imx7_digprog_ops = { + .read = imx7_set_clr_tog_read, ++ .write = imx7_digprog_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, +-- +2.27.0 + diff --git a/include-Make-headers-more-self-contained.patch b/include-Make-headers-more-self-contained.patch new file mode 100644 index 0000000000000000000000000000000000000000..565471c8ce67ec70b0bb5691f66cba384b8a1202 --- /dev/null +++ b/include-Make-headers-more-self-contained.patch @@ -0,0 +1,1551 @@ +From 1b6a1ef572411efee7cbf1b65aeb15c704b997cc Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Mon, 12 Aug 2019 07:23:31 +0200 +Subject: [PATCH] include: Make headers more self-contained +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Back in 2016, we discussed[1] rules for headers, and these were +generally liked: + +1. Have a carefully curated header that's included everywhere first. We + got that already thanks to Peter: osdep.h. + +2. Headers should normally include everything they need beyond osdep.h. + If exceptions are needed for some reason, they must be documented in + the header. If all that's needed from a header is typedefs, put + those into qemu/typedefs.h instead of including the header. + +3. Cyclic inclusion is forbidden. + +This patch gets include/ closer to obeying 2. + +It's actually extracted from my "[RFC] Baby steps towards saner +headers" series[2], which demonstrates a possible path towards +checking 2 automatically. It passes the RFC test there. + +[1] Message-ID: <87h9g8j57d.fsf@blackfin.pond.sub.org> + https://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg03345.html +[2] Message-Id: <20190711122827.18970-1-armbru@redhat.com> + https://lists.nongnu.org/archive/html/qemu-devel/2019-07/msg02715.html + +Signed-off-by: Markus Armbruster +Reviewed-by: Alistair Francis +Message-Id: <20190812052359.30071-2-armbru@redhat.com> +Tested-by: Philippe Mathieu-Daudé +--- + include/block/raw-aio.h | 2 ++ + include/block/write-threshold.h | 2 ++ + include/disas/disas.h | 1 + + include/exec/cputlb.h | 3 +++ + include/exec/exec-all.h | 1 + + include/exec/ioport.h | 2 ++ + include/exec/memory-internal.h | 2 ++ + include/exec/ram_addr.h | 1 + + include/exec/softmmu-semi.h | 2 ++ + include/exec/tb-hash.h | 2 ++ + include/exec/user/thunk.h | 2 ++ + include/fpu/softfloat-macros.h | 2 ++ + include/hw/acpi/pci.h | 3 +++ + include/hw/acpi/tco.h | 3 +++ + include/hw/adc/stm32f2xx_adc.h | 2 ++ + include/hw/arm/allwinner-a10.h | 1 + + include/hw/arm/aspeed_soc.h | 1 + + include/hw/arm/bcm2836.h | 1 + + include/hw/arm/exynos4210.h | 3 +-- + include/hw/arm/fsl-imx25.h | 1 + + include/hw/arm/fsl-imx31.h | 1 + + include/hw/arm/sharpsl.h | 3 +++ + include/hw/arm/xlnx-zynqmp.h | 1 + + include/hw/block/fdc.h | 2 ++ + include/hw/block/flash.h | 1 + + include/hw/char/escc.h | 1 + + include/hw/char/xilinx_uartlite.h | 2 ++ + include/hw/core/generic-loader.h | 1 + + include/hw/cris/etraxfs.h | 1 + + include/hw/cris/etraxfs_dma.h | 3 +++ + include/hw/display/i2c-ddc.h | 1 + + include/hw/empty_slot.h | 2 ++ + include/hw/gpio/bcm2835_gpio.h | 1 + + include/hw/i2c/aspeed_i2c.h | 2 ++ + include/hw/i386/apic_internal.h | 1 + + include/hw/i386/ioapic_internal.h | 1 + + include/hw/intc/allwinner-a10-pic.h | 2 ++ + include/hw/intc/heathrow_pic.h | 2 ++ + include/hw/intc/mips_gic.h | 1 + + include/hw/isa/vt82c686.h | 2 ++ + include/hw/mips/cps.h | 1 + + include/hw/misc/macio/cuda.h | 2 ++ + include/hw/misc/macio/gpio.h | 3 +++ + include/hw/misc/macio/macio.h | 2 ++ + include/hw/misc/macio/pmu.h | 3 +++ + include/hw/misc/mips_cmgcr.h | 2 ++ + include/hw/misc/mips_cpc.h | 2 ++ + include/hw/misc/pvpanic.h | 3 +++ + include/hw/net/allwinner_emac.h | 1 + + include/hw/net/lance.h | 1 + + include/hw/nvram/chrp_nvram.h | 2 ++ + include/hw/pci-host/sabre.h | 2 ++ + include/hw/pci-host/uninorth.h | 2 +- + include/hw/pci/pcie_aer.h | 1 + + include/hw/ppc/pnv_core.h | 1 + + include/hw/ppc/ppc4xx.h | 4 ++++ + include/hw/ppc/spapr_irq.h | 3 +++ + include/hw/ppc/spapr_vio.h | 1 + + include/hw/ppc/spapr_xive.h | 2 ++ + include/hw/ppc/xive_regs.h | 3 +++ + include/hw/riscv/boot.h | 2 ++ + include/hw/riscv/riscv_hart.h | 3 +++ + include/hw/riscv/sifive_clint.h | 2 ++ + include/hw/riscv/sifive_e.h | 1 + + include/hw/riscv/sifive_plic.h | 2 +- + include/hw/riscv/sifive_prci.h | 2 ++ + include/hw/riscv/sifive_test.h | 2 ++ + include/hw/riscv/sifive_u.h | 1 + + include/hw/riscv/sifive_uart.h | 3 +++ + include/hw/riscv/spike.h | 3 +++ + include/hw/riscv/virt.h | 3 +++ + include/hw/s390x/ap-device.h | 3 +++ + include/hw/s390x/css-bridge.h | 3 ++- + include/hw/s390x/css.h | 1 + + include/hw/s390x/tod.h | 2 +- + include/hw/semihosting/console.h | 2 ++ + include/hw/sh4/sh_intc.h | 1 + + include/hw/sparc/sparc64.h | 2 ++ + include/hw/ssi/aspeed_smc.h | 1 + + include/hw/ssi/xilinx_spips.h | 1 + + include/hw/timer/allwinner-a10-pit.h | 1 + + include/hw/timer/i8254_internal.h | 1 + + include/hw/timer/m48t59.h | 2 ++ + include/hw/timer/mc146818rtc_regs.h | 2 ++ + include/hw/timer/xlnx-zynqmp-rtc.h | 1 + + include/hw/virtio/virtio-access.h | 1 + + include/hw/virtio/virtio-gpu-bswap.h | 1 + + include/hw/virtio/virtio-rng.h | 1 + + include/hw/watchdog/wdt_aspeed.h | 1 + + include/libdecnumber/decNumberLocal.h | 1 + + include/migration/cpu.h | 3 +++ + include/monitor/hmp-target.h | 2 ++ + include/qemu/atomic128.h | 2 ++ + include/qemu/ratelimit.h | 2 ++ + include/qemu/thread-win32.h | 2 +- + include/sysemu/balloon.h | 1 + + include/sysemu/cryptodev-vhost-user.h | 3 +++ + include/sysemu/hvf.h | 1 + + include/sysemu/iothread.h | 1 + + include/sysemu/kvm_int.h | 2 ++ + include/sysemu/memory_mapping.h | 2 ++ + include/sysemu/xen-mapcache.h | 2 ++ + include/ui/egl-helpers.h | 3 +++ + include/ui/input.h | 1 + + include/ui/spice-display.h | 1 + + target/hppa/cpu.h | 2 +- + 106 files changed, 183 insertions(+), 8 deletions(-) + +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 0cb7cc74a2..4629f24d08 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -12,9 +12,11 @@ + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ ++ + #ifndef QEMU_RAW_AIO_H + #define QEMU_RAW_AIO_H + ++#include "block/aio.h" + #include "qemu/coroutine.h" + #include "qemu/iov.h" + +diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h +index 80d8aab5d0..c646f267a4 100644 +--- a/include/block/write-threshold.h ++++ b/include/block/write-threshold.h +@@ -9,9 +9,11 @@ + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ ++ + #ifndef BLOCK_WRITE_THRESHOLD_H + #define BLOCK_WRITE_THRESHOLD_H + ++#include "block/block_int.h" + + /* + * bdrv_write_threshold_set: +diff --git a/include/disas/disas.h b/include/disas/disas.h +index 15da511f49..ba47e9197c 100644 +--- a/include/disas/disas.h ++++ b/include/disas/disas.h +@@ -1,6 +1,7 @@ + #ifndef QEMU_DISAS_H + #define QEMU_DISAS_H + ++#include "exec/hwaddr.h" + + #ifdef NEED_CPU_H + #include "cpu.h" +diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h +index 5373188be3..a62cfb28d5 100644 +--- a/include/exec/cputlb.h ++++ b/include/exec/cputlb.h +@@ -16,9 +16,12 @@ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ ++ + #ifndef CPUTLB_H + #define CPUTLB_H + ++#include "exec/cpu-common.h" ++ + #if !defined(CONFIG_USER_ONLY) + /* cputlb.c */ + void tlb_protect_code(ram_addr_t ram_addr); +diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h +index 16034ee651..135aeaab0d 100644 +--- a/include/exec/exec-all.h ++++ b/include/exec/exec-all.h +@@ -20,6 +20,7 @@ + #ifndef EXEC_ALL_H + #define EXEC_ALL_H + ++#include "cpu.h" + #include "exec/tb-context.h" + #include "sysemu/cpus.h" + +diff --git a/include/exec/ioport.h b/include/exec/ioport.h +index a298b89ce1..97feb296d2 100644 +--- a/include/exec/ioport.h ++++ b/include/exec/ioport.h +@@ -24,6 +24,8 @@ + #ifndef IOPORT_H + #define IOPORT_H + ++#include "exec/memory.h" ++ + #define MAX_IOPORTS (64 * 1024) + #define IOPORTS_MASK (MAX_IOPORTS - 1) + +diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h +index d1a9dd1ec8..ef4fb92371 100644 +--- a/include/exec/memory-internal.h ++++ b/include/exec/memory-internal.h +@@ -20,6 +20,8 @@ + #ifndef MEMORY_INTERNAL_H + #define MEMORY_INTERNAL_H + ++#include "cpu.h" ++ + #ifndef CONFIG_USER_ONLY + static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv) + { +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index 523440662b..27a164b669 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -20,6 +20,7 @@ + #define RAM_ADDR_H + + #ifndef CONFIG_USER_ONLY ++#include "cpu.h" + #include "hw/xen/xen.h" + #include "sysemu/tcg.h" + #include "exec/ramlist.h" +diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h +index 970837992e..fbcae88f4b 100644 +--- a/include/exec/softmmu-semi.h ++++ b/include/exec/softmmu-semi.h +@@ -10,6 +10,8 @@ + #ifndef SOFTMMU_SEMI_H + #define SOFTMMU_SEMI_H + ++#include "cpu.h" ++ + static inline uint64_t softmmu_tget64(CPUArchState *env, target_ulong addr) + { + uint64_t val; +diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h +index 4f3a37d927..805235d321 100644 +--- a/include/exec/tb-hash.h ++++ b/include/exec/tb-hash.h +@@ -20,6 +20,8 @@ + #ifndef EXEC_TB_HASH_H + #define EXEC_TB_HASH_H + ++#include "exec/cpu-defs.h" ++#include "exec/exec-all.h" + #include "qemu/xxhash.h" + + #ifdef CONFIG_SOFTMMU +diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h +index 8d3af5a3be..eae2c27f99 100644 +--- a/include/exec/user/thunk.h ++++ b/include/exec/user/thunk.h +@@ -16,10 +16,12 @@ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ ++ + #ifndef THUNK_H + #define THUNK_H + + #include "cpu.h" ++#include "exec/user/abitypes.h" + + /* types enums definitions */ + +diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h +index c55aa6d174..be83a833ec 100644 +--- a/include/fpu/softfloat-macros.h ++++ b/include/fpu/softfloat-macros.h +@@ -82,6 +82,8 @@ this code that are retained. + #ifndef FPU_SOFTFLOAT_MACROS_H + #define FPU_SOFTFLOAT_MACROS_H + ++#include "fpu/softfloat.h" ++ + /*---------------------------------------------------------------------------- + | Shifts `a' right by the number of bits given in `count'. If any nonzero + | bits are shifted off, they are ``jammed'' into the least significant bit of +diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h +index 8bbd32cf45..bf2a3ed0ba 100644 +--- a/include/hw/acpi/pci.h ++++ b/include/hw/acpi/pci.h +@@ -22,9 +22,12 @@ + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ ++ + #ifndef HW_ACPI_PCI_H + #define HW_ACPI_PCI_H + ++#include "hw/acpi/bios-linker-loader.h" ++ + typedef struct AcpiMcfgInfo { + uint64_t base; + uint32_t size; +diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h +index d19dd59353..726f840cce 100644 +--- a/include/hw/acpi/tco.h ++++ b/include/hw/acpi/tco.h +@@ -6,9 +6,12 @@ + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ ++ + #ifndef HW_ACPI_TCO_H + #define HW_ACPI_TCO_H + ++#include "exec/memory.h" ++#include "migration/vmstate.h" + + /* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */ + #define TCO_TICK_NSEC 600000000LL +diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h +index a72f734eb1..663b79f4f3 100644 +--- a/include/hw/adc/stm32f2xx_adc.h ++++ b/include/hw/adc/stm32f2xx_adc.h +@@ -25,6 +25,8 @@ + #ifndef HW_STM32F2XX_ADC_H + #define HW_STM32F2XX_ADC_H + ++#include "hw/sysbus.h" ++ + #define ADC_SR 0x00 + #define ADC_CR1 0x04 + #define ADC_CR2 0x08 +diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h +index e99fe2ea2e..7182ce5c4b 100644 +--- a/include/hw/arm/allwinner-a10.h ++++ b/include/hw/arm/allwinner-a10.h +@@ -11,6 +11,7 @@ + #include "hw/ide/ahci.h" + + #include "sysemu/sysemu.h" ++#include "target/arm/cpu.h" + + + #define AW_A10_PIC_REG_BASE 0x01c20400 +diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h +index cef605ad6b..976fd6be93 100644 +--- a/include/hw/arm/aspeed_soc.h ++++ b/include/hw/arm/aspeed_soc.h +@@ -22,6 +22,7 @@ + #include "hw/ssi/aspeed_smc.h" + #include "hw/watchdog/wdt_aspeed.h" + #include "hw/net/ftgmac100.h" ++#include "target/arm/cpu.h" + + #define ASPEED_SPIS_NUM 2 + #define ASPEED_WDTS_NUM 3 +diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h +index a2cb8454de..97187f72be 100644 +--- a/include/hw/arm/bcm2836.h ++++ b/include/hw/arm/bcm2836.h +@@ -13,6 +13,7 @@ + + #include "hw/arm/bcm2835_peripherals.h" + #include "hw/intc/bcm2836_control.h" ++#include "target/arm/cpu.h" + + #define TYPE_BCM283X "bcm283x" + #define BCM283X(obj) OBJECT_CHECK(BCM283XState, (obj), TYPE_BCM283X) +diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h +index aa137271c0..f0f23b0e9b 100644 +--- a/include/hw/arm/exynos4210.h ++++ b/include/hw/arm/exynos4210.h +@@ -19,13 +19,12 @@ + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . +- * + */ + + #ifndef EXYNOS4210_H + #define EXYNOS4210_H + +-#include "exec/memory.h" ++#include "hw/sysbus.h" + #include "target/arm/cpu-qom.h" + + #define EXYNOS4210_NCPUS 2 +diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h +index 3280ab1fb0..241efb52ae 100644 +--- a/include/hw/arm/fsl-imx25.h ++++ b/include/hw/arm/fsl-imx25.h +@@ -27,6 +27,7 @@ + #include "hw/i2c/imx_i2c.h" + #include "hw/gpio/imx_gpio.h" + #include "exec/memory.h" ++#include "target/arm/cpu.h" + + #define TYPE_FSL_IMX25 "fsl,imx25" + #define FSL_IMX25(obj) OBJECT_CHECK(FslIMX25State, (obj), TYPE_FSL_IMX25) +diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h +index e68a81efd7..ac5ca9826a 100644 +--- a/include/hw/arm/fsl-imx31.h ++++ b/include/hw/arm/fsl-imx31.h +@@ -26,6 +26,7 @@ + #include "hw/i2c/imx_i2c.h" + #include "hw/gpio/imx_gpio.h" + #include "exec/memory.h" ++#include "target/arm/cpu.h" + + #define TYPE_FSL_IMX31 "fsl,imx31" + #define FSL_IMX31(obj) OBJECT_CHECK(FslIMX31State, (obj), TYPE_FSL_IMX31) +diff --git a/include/hw/arm/sharpsl.h b/include/hw/arm/sharpsl.h +index 5bf6db1fa2..89e168fbff 100644 +--- a/include/hw/arm/sharpsl.h ++++ b/include/hw/arm/sharpsl.h +@@ -3,9 +3,12 @@ + * + * This file is licensed under the GNU GPL. + */ ++ + #ifndef QEMU_SHARPSL_H + #define QEMU_SHARPSL_H + ++#include "exec/hwaddr.h" ++ + #define zaurus_printf(format, ...) \ + fprintf(stderr, "%s: " format, __func__, ##__VA_ARGS__) + +diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h +index 35804ea80a..6cb65e7537 100644 +--- a/include/hw/arm/xlnx-zynqmp.h ++++ b/include/hw/arm/xlnx-zynqmp.h +@@ -32,6 +32,7 @@ + #include "hw/intc/xlnx-zynqmp-ipi.h" + #include "hw/timer/xlnx-zynqmp-rtc.h" + #include "hw/cpu/cluster.h" ++#include "target/arm/cpu.h" + + #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp" + #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \ +diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h +index 8cece84326..f4fe2f471b 100644 +--- a/include/hw/block/fdc.h ++++ b/include/hw/block/fdc.h +@@ -1,6 +1,8 @@ + #ifndef HW_FDC_H + #define HW_FDC_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" + #include "qapi/qapi-types-block.h" + + /* fdc.c */ +diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h +index 1acaf7de80..83a75f3170 100644 +--- a/include/hw/block/flash.h ++++ b/include/hw/block/flash.h +@@ -4,6 +4,7 @@ + /* NOR flash devices */ + + #include "exec/memory.h" ++#include "migration/vmstate.h" + + /* pflash_cfi01.c */ + +diff --git a/include/hw/char/escc.h b/include/hw/char/escc.h +index 42aca83611..d5196c53e6 100644 +--- a/include/hw/char/escc.h ++++ b/include/hw/char/escc.h +@@ -3,6 +3,7 @@ + + #include "chardev/char-fe.h" + #include "chardev/char-serial.h" ++#include "hw/sysbus.h" + #include "ui/input.h" + + /* escc.c */ +diff --git a/include/hw/char/xilinx_uartlite.h b/include/hw/char/xilinx_uartlite.h +index 634086b657..99d8bbf405 100644 +--- a/include/hw/char/xilinx_uartlite.h ++++ b/include/hw/char/xilinx_uartlite.h +@@ -15,6 +15,8 @@ + #ifndef XILINX_UARTLITE_H + #define XILINX_UARTLITE_H + ++#include "hw/sysbus.h" ++ + static inline DeviceState *xilinx_uartlite_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +diff --git a/include/hw/core/generic-loader.h b/include/hw/core/generic-loader.h +index dd27c42ab0..9ffce1c5a3 100644 +--- a/include/hw/core/generic-loader.h ++++ b/include/hw/core/generic-loader.h +@@ -19,6 +19,7 @@ + #define GENERIC_LOADER_H + + #include "elf.h" ++#include "hw/qdev-core.h" + + typedef struct GenericLoaderState { + /* */ +diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h +index 8da965addb..494222d315 100644 +--- a/include/hw/cris/etraxfs.h ++++ b/include/hw/cris/etraxfs.h +@@ -27,6 +27,7 @@ + + #include "net/net.h" + #include "hw/cris/etraxfs_dma.h" ++#include "hw/sysbus.h" + + /* Instantiate an ETRAXFS Ethernet MAC. */ + static inline DeviceState * +diff --git a/include/hw/cris/etraxfs_dma.h b/include/hw/cris/etraxfs_dma.h +index f6f33e0980..31ae360611 100644 +--- a/include/hw/cris/etraxfs_dma.h ++++ b/include/hw/cris/etraxfs_dma.h +@@ -1,6 +1,9 @@ + #ifndef HW_ETRAXFS_DMA_H + #define HW_ETRAXFS_DMA_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" ++ + struct dma_context_metadata { + /* data descriptor md */ + uint16_t metadata; +diff --git a/include/hw/display/i2c-ddc.h b/include/hw/display/i2c-ddc.h +index c29443c5af..1cf53a0c8d 100644 +--- a/include/hw/display/i2c-ddc.h ++++ b/include/hw/display/i2c-ddc.h +@@ -20,6 +20,7 @@ + #define I2C_DDC_H + + #include "hw/display/edid.h" ++#include "hw/i2c/i2c.h" + + /* A simple I2C slave which just returns the contents of its EDID blob. */ + struct I2CDDCState { +diff --git a/include/hw/empty_slot.h b/include/hw/empty_slot.h +index 123a9f8989..cb9a221aa6 100644 +--- a/include/hw/empty_slot.h ++++ b/include/hw/empty_slot.h +@@ -1,6 +1,8 @@ + #ifndef HW_EMPTY_SLOT_H + #define HW_EMPTY_SLOT_H + ++#include "exec/hwaddr.h" ++ + /* empty_slot.c */ + void empty_slot_init(hwaddr addr, uint64_t slot_size); + +diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h +index 9f8e0c720c..b0de0a3c74 100644 +--- a/include/hw/gpio/bcm2835_gpio.h ++++ b/include/hw/gpio/bcm2835_gpio.h +@@ -15,6 +15,7 @@ + #define BCM2835_GPIO_H + + #include "hw/sd/sd.h" ++#include "hw/sysbus.h" + + typedef struct BCM2835GpioState { + SysBusDevice parent_obj; +diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h +index f9020acdef..a2753f0bbb 100644 +--- a/include/hw/i2c/aspeed_i2c.h ++++ b/include/hw/i2c/aspeed_i2c.h +@@ -17,10 +17,12 @@ + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ ++ + #ifndef ASPEED_I2C_H + #define ASPEED_I2C_H + + #include "hw/i2c/i2c.h" ++#include "hw/sysbus.h" + + #define TYPE_ASPEED_I2C "aspeed.i2c" + #define ASPEED_I2C(obj) \ +diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h +index 1209eb483a..b04bdd947f 100644 +--- a/include/hw/i386/apic_internal.h ++++ b/include/hw/i386/apic_internal.h +@@ -24,6 +24,7 @@ + #include "cpu.h" + #include "exec/memory.h" + #include "qemu/timer.h" ++#include "target/i386/cpu-qom.h" + + /* APIC Local Vector Table */ + #define APIC_LVT_TIMER 0 +diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h +index 07002f9662..3d2eec2aa7 100644 +--- a/include/hw/i386/ioapic_internal.h ++++ b/include/hw/i386/ioapic_internal.h +@@ -24,6 +24,7 @@ + + #include "hw/hw.h" + #include "exec/memory.h" ++#include "hw/i386/ioapic.h" + #include "hw/sysbus.h" + #include "qemu/notify.h" + +diff --git a/include/hw/intc/allwinner-a10-pic.h b/include/hw/intc/allwinner-a10-pic.h +index 1d314a70d9..a5895401d1 100644 +--- a/include/hw/intc/allwinner-a10-pic.h ++++ b/include/hw/intc/allwinner-a10-pic.h +@@ -1,6 +1,8 @@ + #ifndef ALLWINNER_A10_PIC_H + #define ALLWINNER_A10_PIC_H + ++#include "hw/sysbus.h" ++ + #define TYPE_AW_A10_PIC "allwinner-a10-pic" + #define AW_A10_PIC(obj) OBJECT_CHECK(AwA10PICState, (obj), TYPE_AW_A10_PIC) + +diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h +index 6c91ec91bb..b163e27ab9 100644 +--- a/include/hw/intc/heathrow_pic.h ++++ b/include/hw/intc/heathrow_pic.h +@@ -26,6 +26,8 @@ + #ifndef HW_INTC_HEATHROW_PIC_H + #define HW_INTC_HEATHROW_PIC_H + ++#include "hw/sysbus.h" ++ + #define TYPE_HEATHROW "heathrow" + #define HEATHROW(obj) OBJECT_CHECK(HeathrowState, (obj), TYPE_HEATHROW) + +diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h +index 902a12b178..8428287bf9 100644 +--- a/include/hw/intc/mips_gic.h ++++ b/include/hw/intc/mips_gic.h +@@ -13,6 +13,7 @@ + + #include "qemu/units.h" + #include "hw/timer/mips_gictimer.h" ++#include "hw/sysbus.h" + #include "cpu.h" + /* + * GIC Specific definitions +diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h +index c3c2b6e786..a54c3fe60a 100644 +--- a/include/hw/isa/vt82c686.h ++++ b/include/hw/isa/vt82c686.h +@@ -1,6 +1,8 @@ + #ifndef HW_VT82C686_H + #define HW_VT82C686_H + ++#include "hw/irq.h" ++ + #define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" + + /* vt82c686.c */ +diff --git a/include/hw/mips/cps.h b/include/hw/mips/cps.h +index aab1af926d..a941c55f27 100644 +--- a/include/hw/mips/cps.h ++++ b/include/hw/mips/cps.h +@@ -25,6 +25,7 @@ + #include "hw/intc/mips_gic.h" + #include "hw/misc/mips_cpc.h" + #include "hw/misc/mips_itu.h" ++#include "target/mips/cpu.h" + + #define TYPE_MIPS_CPS "mips-cps" + #define MIPS_CPS(obj) OBJECT_CHECK(MIPSCPSState, (obj), TYPE_MIPS_CPS) +diff --git a/include/hw/misc/macio/cuda.h b/include/hw/misc/macio/cuda.h +index 7dad469142..5768075ac5 100644 +--- a/include/hw/misc/macio/cuda.h ++++ b/include/hw/misc/macio/cuda.h +@@ -26,6 +26,8 @@ + #ifndef CUDA_H + #define CUDA_H + ++#include "hw/misc/mos6522.h" ++ + /* CUDA commands (2nd byte) */ + #define CUDA_WARM_START 0x0 + #define CUDA_AUTOPOLL 0x1 +diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h +index 2838ae5fde..24a4364b39 100644 +--- a/include/hw/misc/macio/gpio.h ++++ b/include/hw/misc/macio/gpio.h +@@ -26,6 +26,9 @@ + #ifndef MACIO_GPIO_H + #define MACIO_GPIO_H + ++#include "hw/ppc/openpic.h" ++#include "hw/sysbus.h" ++ + #define TYPE_MACIO_GPIO "macio-gpio" + #define MACIO_GPIO(obj) OBJECT_CHECK(MacIOGPIOState, (obj), TYPE_MACIO_GPIO) + +diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h +index 970058b6ed..070a694eb5 100644 +--- a/include/hw/misc/macio/macio.h ++++ b/include/hw/misc/macio/macio.h +@@ -27,10 +27,12 @@ + #define MACIO_H + + #include "hw/char/escc.h" ++#include "hw/ide/internal.h" + #include "hw/intc/heathrow_pic.h" + #include "hw/misc/macio/cuda.h" + #include "hw/misc/macio/gpio.h" + #include "hw/misc/macio/pmu.h" ++#include "hw/ppc/mac.h" + #include "hw/ppc/mac_dbdma.h" + #include "hw/ppc/openpic.h" + +diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h +index d10895ba5f..7ef83dee4c 100644 +--- a/include/hw/misc/macio/pmu.h ++++ b/include/hw/misc/macio/pmu.h +@@ -10,6 +10,9 @@ + #ifndef PMU_H + #define PMU_H + ++#include "hw/misc/mos6522.h" ++#include "hw/misc/macio/gpio.h" ++ + /* + * PMU commands + */ +diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h +index c9dfcb4b84..3e6e223273 100644 +--- a/include/hw/misc/mips_cmgcr.h ++++ b/include/hw/misc/mips_cmgcr.h +@@ -10,6 +10,8 @@ + #ifndef MIPS_CMGCR_H + #define MIPS_CMGCR_H + ++#include "hw/sysbus.h" ++ + #define TYPE_MIPS_GCR "mips-gcr" + #define MIPS_GCR(obj) OBJECT_CHECK(MIPSGCRState, (obj), TYPE_MIPS_GCR) + +diff --git a/include/hw/misc/mips_cpc.h b/include/hw/misc/mips_cpc.h +index 72c834e039..3f670578b0 100644 +--- a/include/hw/misc/mips_cpc.h ++++ b/include/hw/misc/mips_cpc.h +@@ -20,6 +20,8 @@ + #ifndef MIPS_CPC_H + #define MIPS_CPC_H + ++#include "hw/sysbus.h" ++ + #define CPC_ADDRSPACE_SZ 0x6000 + + /* CPC blocks offsets relative to base address */ +diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h +index 1ee071a703..ae0c8188ce 100644 +--- a/include/hw/misc/pvpanic.h ++++ b/include/hw/misc/pvpanic.h +@@ -11,9 +11,12 @@ + * See the COPYING file in the top-level directory. + * + */ ++ + #ifndef HW_MISC_PVPANIC_H + #define HW_MISC_PVPANIC_H + ++#include "qom/object.h" ++ + #define TYPE_PVPANIC "pvpanic" + + #define PVPANIC_IOPORT_PROP "ioport" +diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h +index 905a43deb4..5013207d15 100644 +--- a/include/hw/net/allwinner_emac.h ++++ b/include/hw/net/allwinner_emac.h +@@ -27,6 +27,7 @@ + #include "net/net.h" + #include "qemu/fifo8.h" + #include "hw/net/mii.h" ++#include "hw/sysbus.h" + + #define TYPE_AW_EMAC "allwinner-emac" + #define AW_EMAC(obj) OBJECT_CHECK(AwEmacState, (obj), TYPE_AW_EMAC) +diff --git a/include/hw/net/lance.h b/include/hw/net/lance.h +index ffdd35c4d7..0357f5f65c 100644 +--- a/include/hw/net/lance.h ++++ b/include/hw/net/lance.h +@@ -31,6 +31,7 @@ + + #include "net/net.h" + #include "hw/net/pcnet.h" ++#include "hw/sysbus.h" + + #define TYPE_LANCE "lance" + #define SYSBUS_PCNET(obj) \ +diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h +index b4f5b2b104..09941a9be4 100644 +--- a/include/hw/nvram/chrp_nvram.h ++++ b/include/hw/nvram/chrp_nvram.h +@@ -18,6 +18,8 @@ + #ifndef CHRP_NVRAM_H + #define CHRP_NVRAM_H + ++#include "qemu/bswap.h" ++ + /* OpenBIOS NVRAM partition */ + typedef struct { + uint8_t signature; +diff --git a/include/hw/pci-host/sabre.h b/include/hw/pci-host/sabre.h +index 9afa4938fd..99b5aefbec 100644 +--- a/include/hw/pci-host/sabre.h ++++ b/include/hw/pci-host/sabre.h +@@ -1,6 +1,8 @@ + #ifndef HW_PCI_HOST_SABRE_H + #define HW_PCI_HOST_SABRE_H + ++#include "hw/pci/pci.h" ++#include "hw/pci/pci_host.h" + #include "hw/sparc/sun4u_iommu.h" + + #define MAX_IVEC 0x40 +diff --git a/include/hw/pci-host/uninorth.h b/include/hw/pci-host/uninorth.h +index 060324536a..9a5cabd4c5 100644 +--- a/include/hw/pci-host/uninorth.h ++++ b/include/hw/pci-host/uninorth.h +@@ -26,7 +26,7 @@ + #define UNINORTH_H + + #include "hw/hw.h" +- ++#include "hw/pci/pci_host.h" + #include "hw/ppc/openpic.h" + + /* UniNorth version */ +diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h +index 729a9439c8..502dcd7eba 100644 +--- a/include/hw/pci/pcie_aer.h ++++ b/include/hw/pci/pcie_aer.h +@@ -22,6 +22,7 @@ + #define QEMU_PCIE_AER_H + + #include "hw/hw.h" ++#include "hw/pci/pci_regs.h" + + /* definitions which PCIExpressDevice uses */ + +diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h +index d0926454a9..bfbd2ec42a 100644 +--- a/include/hw/ppc/pnv_core.h ++++ b/include/hw/ppc/pnv_core.h +@@ -21,6 +21,7 @@ + #define PPC_PNV_CORE_H + + #include "hw/cpu/core.h" ++#include "target/ppc/cpu.h" + + #define TYPE_PNV_CORE "powernv-cpu-core" + #define PNV_CORE(obj) \ +diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h +index 39a7ba1ce6..90f8866138 100644 +--- a/include/hw/ppc/ppc4xx.h ++++ b/include/hw/ppc/ppc4xx.h +@@ -25,6 +25,10 @@ + #ifndef PPC4XX_H + #define PPC4XX_H + ++#include "hw/ppc/ppc.h" ++#include "exec/cpu-common.h" ++#include "exec/memory.h" ++ + /* PowerPC 4xx core initialization */ + PowerPCCPU *ppc4xx_init(const char *cpu_model, + clk_setup_t *cpu_clk, clk_setup_t *tb_clk, +diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h +index f965a58f89..cd6e18b05e 100644 +--- a/include/hw/ppc/spapr_irq.h ++++ b/include/hw/ppc/spapr_irq.h +@@ -10,6 +10,9 @@ + #ifndef HW_SPAPR_IRQ_H + #define HW_SPAPR_IRQ_H + ++#include "hw/irq.h" ++#include "target/ppc/cpu-qom.h" ++ + /* + * IRQ range offsets per device type + */ +diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h +index 97951fc6b4..92bfa72caf 100644 +--- a/include/hw/ppc/spapr_vio.h ++++ b/include/hw/ppc/spapr_vio.h +@@ -22,6 +22,7 @@ + * License along with this library; if not, see . + */ + ++#include "hw/ppc/spapr.h" + #include "sysemu/dma.h" + + #define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device" +diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h +index 7197144265..a39e672f27 100644 +--- a/include/hw/ppc/spapr_xive.h ++++ b/include/hw/ppc/spapr_xive.h +@@ -10,7 +10,9 @@ + #ifndef PPC_SPAPR_XIVE_H + #define PPC_SPAPR_XIVE_H + ++#include "hw/ppc/spapr_irq.h" + #include "hw/ppc/xive.h" ++#include "sysemu/sysemu.h" + + #define TYPE_SPAPR_XIVE "spapr-xive" + #define SPAPR_XIVE(obj) OBJECT_CHECK(SpaprXive, (obj), TYPE_SPAPR_XIVE) +diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h +index 1a8c5b5e64..b0c68ab5f7 100644 +--- a/include/hw/ppc/xive_regs.h ++++ b/include/hw/ppc/xive_regs.h +@@ -16,6 +16,9 @@ + #ifndef PPC_XIVE_REGS_H + #define PPC_XIVE_REGS_H + ++#include "qemu/bswap.h" ++#include "qemu/host-utils.h" ++ + /* + * Interrupt source number encoding on PowerBUS + */ +diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h +index d56f2ae3eb..1f21c2bef1 100644 +--- a/include/hw/riscv/boot.h ++++ b/include/hw/riscv/boot.h +@@ -20,6 +20,8 @@ + #ifndef RISCV_BOOT_H + #define RISCV_BOOT_H + ++#include "exec/cpu-defs.h" ++ + void riscv_find_and_load_firmware(MachineState *machine, + const char *default_machine_firmware, + hwaddr firmware_load_addr); +diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h +index 0671d88a44..3b52b50571 100644 +--- a/include/hw/riscv/riscv_hart.h ++++ b/include/hw/riscv/riscv_hart.h +@@ -21,6 +21,9 @@ + #ifndef HW_RISCV_HART_H + #define HW_RISCV_HART_H + ++#include "hw/sysbus.h" ++#include "target/riscv/cpu.h" ++ + #define TYPE_RISCV_HART_ARRAY "riscv.hart_array" + + #define RISCV_HART_ARRAY(obj) \ +diff --git a/include/hw/riscv/sifive_clint.h b/include/hw/riscv/sifive_clint.h +index e2865be1d1..ae8286c884 100644 +--- a/include/hw/riscv/sifive_clint.h ++++ b/include/hw/riscv/sifive_clint.h +@@ -20,6 +20,8 @@ + #ifndef HW_SIFIVE_CLINT_H + #define HW_SIFIVE_CLINT_H + ++#include "hw/sysbus.h" ++ + #define TYPE_SIFIVE_CLINT "riscv.sifive.clint" + + #define SIFIVE_CLINT(obj) \ +diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h +index d175b24cb2..9c868dd7f9 100644 +--- a/include/hw/riscv/sifive_e.h ++++ b/include/hw/riscv/sifive_e.h +@@ -19,6 +19,7 @@ + #ifndef HW_SIFIVE_E_H + #define HW_SIFIVE_E_H + ++#include "hw/riscv/riscv_hart.h" + #include "hw/riscv/sifive_gpio.h" + + #define TYPE_RISCV_E_SOC "riscv.sifive.e.soc" +diff --git a/include/hw/riscv/sifive_plic.h b/include/hw/riscv/sifive_plic.h +index ce8907f6aa..b0edba2884 100644 +--- a/include/hw/riscv/sifive_plic.h ++++ b/include/hw/riscv/sifive_plic.h +@@ -21,7 +21,7 @@ + #ifndef HW_SIFIVE_PLIC_H + #define HW_SIFIVE_PLIC_H + +-#include "hw/irq.h" ++#include "hw/sysbus.h" + + #define TYPE_SIFIVE_PLIC "riscv.sifive.plic" + +diff --git a/include/hw/riscv/sifive_prci.h b/include/hw/riscv/sifive_prci.h +index bd51c4af3c..8b7de134f8 100644 +--- a/include/hw/riscv/sifive_prci.h ++++ b/include/hw/riscv/sifive_prci.h +@@ -19,6 +19,8 @@ + #ifndef HW_SIFIVE_PRCI_H + #define HW_SIFIVE_PRCI_H + ++#include "hw/sysbus.h" ++ + enum { + SIFIVE_PRCI_HFROSCCFG = 0x0, + SIFIVE_PRCI_HFXOSCCFG = 0x4, +diff --git a/include/hw/riscv/sifive_test.h b/include/hw/riscv/sifive_test.h +index 71d4c9fad7..3a603a6ead 100644 +--- a/include/hw/riscv/sifive_test.h ++++ b/include/hw/riscv/sifive_test.h +@@ -19,6 +19,8 @@ + #ifndef HW_SIFIVE_TEST_H + #define HW_SIFIVE_TEST_H + ++#include "hw/sysbus.h" ++ + #define TYPE_SIFIVE_TEST "riscv.sifive.test" + + #define SIFIVE_TEST(obj) \ +diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h +index 892f0eee21..be021ce256 100644 +--- a/include/hw/riscv/sifive_u.h ++++ b/include/hw/riscv/sifive_u.h +@@ -20,6 +20,7 @@ + #define HW_SIFIVE_U_H + + #include "hw/net/cadence_gem.h" ++#include "hw/riscv/riscv_hart.h" + + #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc" + #define RISCV_U_SOC(obj) \ +diff --git a/include/hw/riscv/sifive_uart.h b/include/hw/riscv/sifive_uart.h +index c8dc1c57fd..65668825a3 100644 +--- a/include/hw/riscv/sifive_uart.h ++++ b/include/hw/riscv/sifive_uart.h +@@ -20,6 +20,9 @@ + #ifndef HW_SIFIVE_UART_H + #define HW_SIFIVE_UART_H + ++#include "chardev/char-fe.h" ++#include "hw/sysbus.h" ++ + enum { + SIFIVE_UART_TXFIFO = 0, + SIFIVE_UART_RXFIFO = 4, +diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h +index 641b70da67..03d870363c 100644 +--- a/include/hw/riscv/spike.h ++++ b/include/hw/riscv/spike.h +@@ -19,6 +19,9 @@ + #ifndef HW_RISCV_SPIKE_H + #define HW_RISCV_SPIKE_H + ++#include "hw/riscv/riscv_hart.h" ++#include "hw/sysbus.h" ++ + typedef struct { + /*< private >*/ + SysBusDevice parent_obj; +diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h +index d01a1a85c4..6e5fbe5d3b 100644 +--- a/include/hw/riscv/virt.h ++++ b/include/hw/riscv/virt.h +@@ -19,6 +19,9 @@ + #ifndef HW_RISCV_VIRT_H + #define HW_RISCV_VIRT_H + ++#include "hw/riscv/riscv_hart.h" ++#include "hw/sysbus.h" ++ + typedef struct { + /*< private >*/ + SysBusDevice parent_obj; +diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h +index 765e9082a3..8df9cd2954 100644 +--- a/include/hw/s390x/ap-device.h ++++ b/include/hw/s390x/ap-device.h +@@ -7,9 +7,12 @@ + * your option) any later version. See the COPYING file in the top-level + * directory. + */ ++ + #ifndef HW_S390X_AP_DEVICE_H + #define HW_S390X_AP_DEVICE_H + ++#include "hw/qdev-core.h" ++ + #define AP_DEVICE_TYPE "ap-device" + + typedef struct APDevice { +diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h +index 5a0203be5f..f7ed2d9a03 100644 +--- a/include/hw/s390x/css-bridge.h ++++ b/include/hw/s390x/css-bridge.h +@@ -12,8 +12,9 @@ + + #ifndef HW_S390X_CSS_BRIDGE_H + #define HW_S390X_CSS_BRIDGE_H ++ + #include "qom/object.h" +-#include "hw/qdev-core.h" ++#include "hw/sysbus.h" + + /* virtual css bridge */ + typedef struct VirtualCssBridge { +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index d033387fba..f46bcafb16 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -17,6 +17,7 @@ + #include "hw/s390x/s390_flic.h" + #include "hw/s390x/ioinst.h" + #include "sysemu/kvm.h" ++#include "target/s390x/cpu-qom.h" + + /* Channel subsystem constants. */ + #define MAX_DEVNO 65535 +diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h +index 9c4a6000c3..d71f4ea8a7 100644 +--- a/include/hw/s390x/tod.h ++++ b/include/hw/s390x/tod.h +@@ -12,7 +12,7 @@ + #define HW_S390_TOD_H + + #include "hw/qdev.h" +-#include "s390-tod.h" ++#include "target/s390x/s390-tod.h" + + typedef struct S390TOD { + uint8_t high; +diff --git a/include/hw/semihosting/console.h b/include/hw/semihosting/console.h +index cfab572c0c..9be9754bcd 100644 +--- a/include/hw/semihosting/console.h ++++ b/include/hw/semihosting/console.h +@@ -9,6 +9,8 @@ + #ifndef SEMIHOST_CONSOLE_H + #define SEMIHOST_CONSOLE_H + ++#include "cpu.h" ++ + /** + * qemu_semihosting_console_outs: + * @env: CPUArchState +diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h +index b7c2404334..3d3efde059 100644 +--- a/include/hw/sh4/sh_intc.h ++++ b/include/hw/sh4/sh_intc.h +@@ -1,6 +1,7 @@ + #ifndef SH_INTC_H + #define SH_INTC_H + ++#include "exec/memory.h" + #include "hw/irq.h" + + typedef unsigned char intc_enum; +diff --git a/include/hw/sparc/sparc64.h b/include/hw/sparc/sparc64.h +index 21ab79e343..4ced36fb5a 100644 +--- a/include/hw/sparc/sparc64.h ++++ b/include/hw/sparc/sparc64.h +@@ -1,6 +1,8 @@ + #ifndef HW_SPARC_SPARC64_H + #define HW_SPARC_SPARC64_H + ++#include "target/sparc/cpu-qom.h" ++ + #define IVEC_MAX 0x40 + + SPARCCPU *sparc64_cpu_devinit(const char *cpu_type, uint64_t prom_addr); +diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h +index 591279ba1f..aa07dac4fe 100644 +--- a/include/hw/ssi/aspeed_smc.h ++++ b/include/hw/ssi/aspeed_smc.h +@@ -26,6 +26,7 @@ + #define ASPEED_SMC_H + + #include "hw/ssi/ssi.h" ++#include "hw/sysbus.h" + + typedef struct AspeedSegments { + hwaddr addr; +diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h +index a0a0ae7584..6a39b55a7b 100644 +--- a/include/hw/ssi/xilinx_spips.h ++++ b/include/hw/ssi/xilinx_spips.h +@@ -28,6 +28,7 @@ + #include "hw/ssi/ssi.h" + #include "qemu/fifo32.h" + #include "hw/stream.h" ++#include "hw/sysbus.h" + + typedef struct XilinxSPIPS XilinxSPIPS; + +diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h +index c0cc3e2169..871c95b512 100644 +--- a/include/hw/timer/allwinner-a10-pit.h ++++ b/include/hw/timer/allwinner-a10-pit.h +@@ -2,6 +2,7 @@ + #define ALLWINNER_A10_PIT_H + + #include "hw/ptimer.h" ++#include "hw/sysbus.h" + + #define TYPE_AW_A10_PIT "allwinner-A10-timer" + #define AW_A10_PIT(obj) OBJECT_CHECK(AwA10PITState, (obj), TYPE_AW_A10_PIT) +diff --git a/include/hw/timer/i8254_internal.h b/include/hw/timer/i8254_internal.h +index c37a438f82..e611c6f227 100644 +--- a/include/hw/timer/i8254_internal.h ++++ b/include/hw/timer/i8254_internal.h +@@ -27,6 +27,7 @@ + + #include "hw/hw.h" + #include "hw/isa/isa.h" ++#include "hw/timer/i8254.h" + #include "qemu/timer.h" + + typedef struct PITChannelState { +diff --git a/include/hw/timer/m48t59.h b/include/hw/timer/m48t59.h +index 43efc91f56..d3fb50e08c 100644 +--- a/include/hw/timer/m48t59.h ++++ b/include/hw/timer/m48t59.h +@@ -1,6 +1,8 @@ + #ifndef HW_M48T59_H + #define HW_M48T59_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" + #include "qom/object.h" + + #define TYPE_NVRAM "nvram" +diff --git a/include/hw/timer/mc146818rtc_regs.h b/include/hw/timer/mc146818rtc_regs.h +index c62f17bf2d..bfbb57e570 100644 +--- a/include/hw/timer/mc146818rtc_regs.h ++++ b/include/hw/timer/mc146818rtc_regs.h +@@ -25,6 +25,8 @@ + #ifndef MC146818RTC_REGS_H + #define MC146818RTC_REGS_H + ++#include "qemu/timer.h" ++ + #define RTC_ISA_IRQ 8 + + #define RTC_SECONDS 0 +diff --git a/include/hw/timer/xlnx-zynqmp-rtc.h b/include/hw/timer/xlnx-zynqmp-rtc.h +index 6e9134edf6..97e32322ed 100644 +--- a/include/hw/timer/xlnx-zynqmp-rtc.h ++++ b/include/hw/timer/xlnx-zynqmp-rtc.h +@@ -28,6 +28,7 @@ + #define HW_TIMER_XLNX_ZYNQMP_RTC_H + + #include "hw/register.h" ++#include "hw/sysbus.h" + + #define TYPE_XLNX_ZYNQMP_RTC "xlnx-zynmp.rtc" + +diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h +index bdf58f3119..6818a23a2d 100644 +--- a/include/hw/virtio/virtio-access.h ++++ b/include/hw/virtio/virtio-access.h +@@ -16,6 +16,7 @@ + #ifndef QEMU_VIRTIO_ACCESS_H + #define QEMU_VIRTIO_ACCESS_H + ++#include "exec/hwaddr.h" + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-bus.h" + +diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h +index 38d12160f6..203f9e1718 100644 +--- a/include/hw/virtio/virtio-gpu-bswap.h ++++ b/include/hw/virtio/virtio-gpu-bswap.h +@@ -15,6 +15,7 @@ + #define HW_VIRTIO_GPU_BSWAP_H + + #include "qemu/bswap.h" ++#include "standard-headers/linux/virtio_gpu.h" + + static inline void + virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) +diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h +index 922dce7cac..ff699335e3 100644 +--- a/include/hw/virtio/virtio-rng.h ++++ b/include/hw/virtio/virtio-rng.h +@@ -12,6 +12,7 @@ + #ifndef QEMU_VIRTIO_RNG_H + #define QEMU_VIRTIO_RNG_H + ++#include "hw/virtio/virtio.h" + #include "sysemu/rng.h" + #include "sysemu/rng-random.h" + #include "standard-headers/linux/virtio_rng.h" +diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h +index daef0c0e23..8c5691ce20 100644 +--- a/include/hw/watchdog/wdt_aspeed.h ++++ b/include/hw/watchdog/wdt_aspeed.h +@@ -10,6 +10,7 @@ + #ifndef WDT_ASPEED_H + #define WDT_ASPEED_H + ++#include "hw/misc/aspeed_scu.h" + #include "hw/sysbus.h" + + #define TYPE_ASPEED_WDT "aspeed.wdt" +diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h +index 12cf1d8b6f..4d53c077f2 100644 +--- a/include/libdecnumber/decNumberLocal.h ++++ b/include/libdecnumber/decNumberLocal.h +@@ -44,6 +44,7 @@ + #define DECNLAUTHOR "Mike Cowlishaw" /* Who to blame */ + + #include "libdecnumber/dconfig.h" ++ #include "libdecnumber/decContext.h" + + /* Conditional code flag -- set this to match hardware platform */ + /* 1=little-endian, 0=big-endian */ +diff --git a/include/migration/cpu.h b/include/migration/cpu.h +index a40bd3549f..da1618d620 100644 +--- a/include/migration/cpu.h ++++ b/include/migration/cpu.h +@@ -1,7 +1,10 @@ + /* Declarations for use for CPU state serialization. */ ++ + #ifndef MIGRATION_CPU_H + #define MIGRATION_CPU_H + ++#include "exec/cpu-defs.h" ++ + #if TARGET_LONG_BITS == 64 + #define qemu_put_betl qemu_put_be64 + #define qemu_get_betl qemu_get_be64 +diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h +index 454e8ed155..8b7820a3ad 100644 +--- a/include/monitor/hmp-target.h ++++ b/include/monitor/hmp-target.h +@@ -25,6 +25,8 @@ + #ifndef MONITOR_HMP_TARGET_H + #define MONITOR_HMP_TARGET_H + ++#include "cpu.h" ++ + #define MD_TLONG 0 + #define MD_I32 1 + +diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h +index ddd0d55d31..6b34484e15 100644 +--- a/include/qemu/atomic128.h ++++ b/include/qemu/atomic128.h +@@ -13,6 +13,8 @@ + #ifndef QEMU_ATOMIC128_H + #define QEMU_ATOMIC128_H + ++#include "qemu/int128.h" ++ + /* + * GCC is a house divided about supporting large atomic operations. + * +diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h +index 1b38291823..01da8d63f1 100644 +--- a/include/qemu/ratelimit.h ++++ b/include/qemu/ratelimit.h +@@ -14,6 +14,8 @@ + #ifndef QEMU_RATELIMIT_H + #define QEMU_RATELIMIT_H + ++#include "qemu/timer.h" ++ + typedef struct { + int64_t slice_start_time; + int64_t slice_end_time; +diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h +index 50af5dd7ab..d0a1a9597e 100644 +--- a/include/qemu/thread-win32.h ++++ b/include/qemu/thread-win32.h +@@ -47,6 +47,6 @@ struct QemuThread { + }; + + /* Only valid for joinable threads. */ +-HANDLE qemu_thread_get_handle(QemuThread *thread); ++HANDLE qemu_thread_get_handle(struct QemuThread *thread); + + #endif +diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h +index c8f6145257..aea0c44985 100644 +--- a/include/sysemu/balloon.h ++++ b/include/sysemu/balloon.h +@@ -14,6 +14,7 @@ + #ifndef QEMU_BALLOON_H + #define QEMU_BALLOON_H + ++#include "exec/cpu-common.h" + #include "qapi/qapi-types-misc.h" + + typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target); +diff --git a/include/sysemu/cryptodev-vhost-user.h b/include/sysemu/cryptodev-vhost-user.h +index 6debf53fc5..0d3421e7e8 100644 +--- a/include/sysemu/cryptodev-vhost-user.h ++++ b/include/sysemu/cryptodev-vhost-user.h +@@ -20,9 +20,12 @@ + * License along with this library; if not, see . + * + */ ++ + #ifndef CRYPTODEV_VHOST_USER_H + #define CRYPTODEV_VHOST_USER_H + ++#include "sysemu/cryptodev-vhost.h" ++ + #define VHOST_USER_MAX_AUTH_KEY_LEN 512 + #define VHOST_USER_MAX_CIPHER_KEY_LEN 64 + +diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h +index d275b5a843..dd1722f2df 100644 +--- a/include/sysemu/hvf.h ++++ b/include/sysemu/hvf.h +@@ -13,6 +13,7 @@ + #ifndef HVF_H + #define HVF_H + ++#include "cpu.h" + #include "qemu/bitops.h" + #include "exec/memory.h" + #include "sysemu/accel.h" +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index 5f6240d5cb..6181486401 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -16,6 +16,7 @@ + + #include "block/aio.h" + #include "qemu/thread.h" ++#include "qom/object.h" + + #define TYPE_IOTHREAD "iothread" + +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 31df465fdc..787dbc7770 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -9,6 +9,8 @@ + #ifndef QEMU_KVM_INT_H + #define QEMU_KVM_INT_H + ++#include "exec/cpu-common.h" ++#include "exec/memory.h" + #include "sysemu/sysemu.h" + #include "sysemu/accel.h" + #include "sysemu/kvm.h" +diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h +index 58452457ce..1b440df486 100644 +--- a/include/sysemu/memory_mapping.h ++++ b/include/sysemu/memory_mapping.h +@@ -15,6 +15,8 @@ + #define MEMORY_MAPPING_H + + #include "qemu/queue.h" ++#include "exec/cpu-common.h" ++#include "exec/cpu-defs.h" + #include "exec/memory.h" + + typedef struct GuestPhysBlock { +diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h +index a03e2f1878..c8e7c2f6cf 100644 +--- a/include/sysemu/xen-mapcache.h ++++ b/include/sysemu/xen-mapcache.h +@@ -9,6 +9,8 @@ + #ifndef XEN_MAPCACHE_H + #define XEN_MAPCACHE_H + ++#include "exec/cpu-common.h" ++ + typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, + ram_addr_t size); + #ifdef CONFIG_XEN +diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h +index d714127799..58bd3a1ec4 100644 +--- a/include/ui/egl-helpers.h ++++ b/include/ui/egl-helpers.h +@@ -4,6 +4,9 @@ + #include + #include + #include ++#include "qapi/qapi-types-ui.h" ++#include "ui/console.h" ++#include "ui/shader.h" + + extern EGLDisplay *qemu_egl_display; + extern EGLConfig qemu_egl_config; +diff --git a/include/ui/input.h b/include/ui/input.h +index 8c8ccb999f..c86219a1c1 100644 +--- a/include/ui/input.h ++++ b/include/ui/input.h +@@ -2,6 +2,7 @@ + #define INPUT_H + + #include "qapi/qapi-types-ui.h" ++#include "qemu/notify.h" + + #define INPUT_EVENT_MASK_KEY (1< + #include + #include + #include +diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h +index aab251bc4b..e9fba96be9 100644 +--- a/target/hppa/cpu.h ++++ b/target/hppa/cpu.h +@@ -22,7 +22,7 @@ + + #include "cpu-qom.h" + #include "exec/cpu-defs.h" +- ++#include "exec/memory.h" + + /* PA-RISC 1.x processors have a strong memory model. */ + /* ??? While we do not yet implement PA-RISC 2.0, those processors have +-- +2.27.0 + diff --git a/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch b/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch new file mode 100644 index 0000000000000000000000000000000000000000..30175fb5126a8a9b7138c206365b61c96bcddaf0 --- /dev/null +++ b/intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch @@ -0,0 +1,357 @@ +From 0a75312c069d89be94bcaa688429d8f60a0c528b Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 13:15:35 +0800 +Subject: [PATCH] intc/gicv3: Add pre-sizing capability to GICv3 + +Currently GICv3 supports fixed smp_cpus CPUs, and all CPUs are +present always. Now we want to pre-sizing GICv3 to support max_cpus +CPUs and not all of them are present always, so some sizing codes +should be concerned. + +GIC irqs, GICR and GICC are pre-created for all possible CPUs at +start, but only smp_cpus CPUs are realize and irqs of smp_cpus CPUs +are connected. + +Other code changes are mainly for arm_gicv3, and we do little about +kvm_arm_gicv3 becasue KVM will deal with the sizing information properly. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 17 +++++++++++---- + hw/intc/arm_gicv3.c | 43 +++++++++++++++++++++++++------------- + hw/intc/arm_gicv3_common.c | 23 ++++++++++++++++++-- + hw/intc/arm_gicv3_cpuif.c | 4 ++++ + hw/intc/arm_gicv3_kvm.c | 28 ++++++++++++++++++++++++- + include/hw/arm/virt.h | 3 ++- + 6 files changed, 96 insertions(+), 22 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 55d403bad6..dda22194b5 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -761,14 +761,19 @@ static void create_gic(VirtMachineState *vms) + SysBusDevice *gicbusdev; + const char *gictype; + int type = vms->gic_version, i; ++ /* The max number of CPUs suppored by GIC */ ++ unsigned int num_cpus = ms->smp.cpus; ++ /* The number of CPUs present before boot */ + unsigned int smp_cpus = ms->smp.cpus; + uint32_t nb_redist_regions = 0; + ++ assert(num_cpus >= smp_cpus); ++ + gictype = (type == 3) ? gicv3_class_name() : gic_class_name(); + + vms->gic = qdev_create(NULL, gictype); + qdev_prop_set_uint32(vms->gic, "revision", type); +- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", num_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +@@ -780,7 +785,7 @@ static void create_gic(VirtMachineState *vms) + if (type == 3) { + uint32_t redist0_capacity = + vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; +- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); ++ uint32_t redist0_count = MIN(num_cpus, redist0_capacity); + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +@@ -793,7 +798,7 @@ static void create_gic(VirtMachineState *vms) + vms->memmap[VIRT_HIGH_GIC_REDIST2].size / GICV3_REDIST_SIZE; + + qdev_prop_set_uint32(vms->gic, "redist-region-count[1]", +- MIN(smp_cpus - redist0_count, redist1_capacity)); ++ MIN(num_cpus - redist0_count, redist1_capacity)); + } + } else { + if (!kvm_irqchip_in_kernel()) { +@@ -820,7 +825,11 @@ static void create_gic(VirtMachineState *vms) + + /* Wire the outputs from each CPU's generic timer and the GICv3 + * maintenance interrupt signal to the appropriate GIC PPI inputs, +- * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. ++ * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's ++ * inputs. ++ * ++ * The irqs of remaining CPUs (if we has) will be connected during ++ * hotplugging. + */ + for (i = 0; i < smp_cpus; i++) { + connect_gic_cpu_irqs(vms, i); +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index cacef26546..a60185113f 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -20,6 +20,7 @@ + #include "qemu/module.h" + #include "hw/sysbus.h" + #include "hw/intc/arm_gicv3.h" ++#include "qom/cpu.h" + #include "gicv3_internal.h" + + static bool irqbetter(GICv3CPUState *cs, int irq, uint8_t prio) +@@ -206,7 +207,9 @@ static void gicv3_update_noirqset(GICv3State *s, int start, int len) + assert(len > 0); + + for (i = 0; i < s->num_cpu; i++) { +- s->cpu[i].seenbetter = false; ++ if (qemu_get_cpu(i)) { ++ s->cpu[i].seenbetter = false; ++ } + } + + /* Find the highest priority pending interrupt in this range. */ +@@ -248,16 +251,18 @@ static void gicv3_update_noirqset(GICv3State *s, int start, int len) + * now be the new best one). + */ + for (i = 0; i < s->num_cpu; i++) { +- GICv3CPUState *cs = &s->cpu[i]; ++ if (qemu_get_cpu(i)) { ++ GICv3CPUState *cs = &s->cpu[i]; + +- if (cs->seenbetter) { +- cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq); +- } ++ if (cs->seenbetter) { ++ cs->hppi.grp = gicv3_irq_group(cs->gic, cs, cs->hppi.irq); ++ } + +- if (!cs->seenbetter && cs->hppi.prio != 0xff && +- cs->hppi.irq >= start && cs->hppi.irq < start + len) { +- gicv3_full_update_noirqset(s); +- break; ++ if (!cs->seenbetter && cs->hppi.prio != 0xff && ++ cs->hppi.irq >= start && cs->hppi.irq < start + len) { ++ gicv3_full_update_noirqset(s); ++ break; ++ } + } + } + } +@@ -268,7 +273,9 @@ void gicv3_update(GICv3State *s, int start, int len) + + gicv3_update_noirqset(s, start, len); + for (i = 0; i < s->num_cpu; i++) { +- gicv3_cpuif_update(&s->cpu[i]); ++ if (qemu_get_cpu(i)) { ++ gicv3_cpuif_update(&s->cpu[i]); ++ } + } + } + +@@ -280,7 +287,9 @@ void gicv3_full_update_noirqset(GICv3State *s) + int i; + + for (i = 0; i < s->num_cpu; i++) { +- s->cpu[i].hppi.prio = 0xff; ++ if (qemu_get_cpu(i)) { ++ s->cpu[i].hppi.prio = 0xff; ++ } + } + + /* Note that we can guarantee that these functions will not +@@ -291,7 +300,9 @@ void gicv3_full_update_noirqset(GICv3State *s) + gicv3_update_noirqset(s, GIC_INTERNAL, s->num_irq - GIC_INTERNAL); + + for (i = 0; i < s->num_cpu; i++) { +- gicv3_redist_update_noirqset(&s->cpu[i]); ++ if (qemu_get_cpu(i)) { ++ gicv3_redist_update_noirqset(&s->cpu[i]); ++ } + } + } + +@@ -304,7 +315,9 @@ void gicv3_full_update(GICv3State *s) + + gicv3_full_update_noirqset(s); + for (i = 0; i < s->num_cpu; i++) { +- gicv3_cpuif_update(&s->cpu[i]); ++ if (qemu_get_cpu(i)) { ++ gicv3_cpuif_update(&s->cpu[i]); ++ } + } + } + +@@ -401,7 +414,9 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) + } + + for (i = 0; i < s->num_cpu; i++) { +- gicv3_cpu_realize(s, i); ++ if (qemu_get_cpu(i)) { ++ gicv3_cpu_realize(s, i); ++ } + } + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 8740a52c9f..913bf068be 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -24,10 +24,12 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "qemu/error-report.h" + #include "qom/cpu.h" + #include "hw/intc/arm_gicv3_common.h" + #include "gicv3_internal.h" + #include "hw/arm/linux-boot-if.h" ++#include "hw/boards.h" + #include "sysemu/kvm.h" + + +@@ -363,10 +365,15 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + for (i = 0; i < s->num_cpu; i++) { + CPUState *cpu = qemu_get_cpu(i); + ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ const CPUArchIdList *possible_cpus = NULL; + uint64_t cpu_affid; + int last; + +- arm_gicv3_common_cpu_realize(s, i); ++ if (cpu) { ++ arm_gicv3_common_cpu_realize(s, i); ++ } + + /* Pre-construct the GICR_TYPER: + * For our implementation: +@@ -380,7 +387,19 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + * VLPIS == 0 (virtual LPIs not supported) + * PLPIS == 0 (physical LPIs not supported) + */ +- cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ if (cpu) { ++ cpu_affid = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ } else { ++ if (!mc->possible_cpu_arch_ids) { ++ error_report("MachineClass must implement possible_cpu_arch_ids " ++ "hook to support pre-sizing GICv3"); ++ exit(1); ++ } ++ ++ possible_cpus = mc->possible_cpu_arch_ids(ms); ++ cpu_affid = possible_cpus->cpus[i].arch_id; ++ } ++ + last = (i == s->num_cpu - 1); + + /* The CPU mp-affinity property is in MPIDR register format; squash +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index 56aa5efede..a20aa693ea 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -1648,6 +1648,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, + aff, targetlist); + + for (i = 0; i < s->num_cpu; i++) { ++ if (!qemu_get_cpu(i)) { ++ continue; ++ } ++ + GICv3CPUState *ocs = &s->cpu[i]; + + if (irm) { +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index f8d7be5479..8eea7c9dd9 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -341,6 +341,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + reg64 = c->gicr_propbaser; + regl = (uint32_t)reg64; + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, true); +@@ -366,6 +370,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { + GICv3CPUState *c = &s->cpu[ncpu]; + ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + reg = c->gicr_ctlr; + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, true); + +@@ -462,6 +470,10 @@ static void kvm_arm_gicv3_put(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); +@@ -525,6 +537,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) + /* Redistributor state (one per CPU) */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_CTLR, ncpu, ®, false); +@@ -560,6 +576,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) + + if (redist_typer & GICR_TYPER_PLPIS) { + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + GICv3CPUState *c = &s->cpu[ncpu]; + + kvm_gicr_access(s, GICR_PROPBASER, ncpu, ®l, false); +@@ -613,6 +633,10 @@ static void kvm_arm_gicv3_get(GICv3State *s) + */ + + for (ncpu = 0; ncpu < s->num_cpu; ncpu++) { ++ if (!qemu_get_cpu(ncpu)) { ++ continue; ++ } ++ + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + +@@ -806,7 +830,9 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + } + + for (i = 0; i < s->num_cpu; i++) { +- kvm_arm_gicv3_cpu_realize(s, i); ++ if (qemu_get_cpu(i)) { ++ kvm_arm_gicv3_cpu_realize(s, i); ++ } + } + + /* Try to create the device via the device control API */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 6880ebe07c..beef4c8002 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -168,8 +168,9 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + vms->memmap[VIRT_GIC_REDIST].size / GICV3_REDIST_SIZE; + + assert(vms->gic_version == 3); ++ GICv3State *s = ARM_GICV3_COMMON(vms->gic); + +- return vms->smp_cpus > redist0_capacity ? 2 : 1; ++ return s->num_cpu > redist0_capacity ? 2 : 1; + } + + #endif /* QEMU_ARM_VIRT_H */ +-- +2.19.1 diff --git a/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch b/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch new file mode 100644 index 0000000000000000000000000000000000000000..5232d3f2aeda4d3a8a83a725173eaff72d2af8d5 --- /dev/null +++ b/intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch @@ -0,0 +1,50 @@ +From a7391f391336024986a5997e3beae8882c983ed0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 12:55:17 +0800 +Subject: [PATCH] intc/gicv3_common: Factor out arm_gicv3_common_cpu_realize + +The CPU object of hotplugged CPU will be defer-created (during +hotplug session), so we must factor out realization code to let +it can be applied to individual CPU. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3_common.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 5edabb928f..798f295d7c 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -303,6 +303,16 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + } + } + ++static void arm_gicv3_common_cpu_realize(GICv3State *s, int ncpu) ++{ ++ CPUState *cpu = qemu_get_cpu(ncpu); ++ ++ s->cpu[ncpu].cpu = cpu; ++ s->cpu[ncpu].gic = s; ++ /* Store GICv3CPUState in CPUARMState gicv3state pointer */ ++ gicv3_set_gicv3state(cpu, &s->cpu[ncpu]); ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -350,10 +360,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + uint64_t cpu_affid; + int last; + +- s->cpu[i].cpu = cpu; +- s->cpu[i].gic = s; +- /* Store GICv3CPUState in CPUARMState gicv3state pointer */ +- gicv3_set_gicv3state(cpu, &s->cpu[i]); ++ arm_gicv3_common_cpu_realize(s, i); + + /* Pre-construct the GICR_TYPER: + * For our implementation: +-- +2.19.1 diff --git a/intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch b/intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch new file mode 100644 index 0000000000000000000000000000000000000000..95c60b02c66e48b2fbe6d64c5e139aa3ecdcfae9 --- /dev/null +++ b/intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch @@ -0,0 +1,197 @@ +From de97ff4a01008ad98f7d69adc4b84843fff3ce19 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 10:59:55 +0800 +Subject: [PATCH] intc/gicv3_cpuif: Factor out gicv3_init_one_cpuif + +The CPU object of hotplugged CPU will be defer-created (during +hotplug session), so we must factor out some code to let it can +be applied to individual CPU. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3.c | 5 +- + hw/intc/arm_gicv3_cpuif.c | 122 ++++++++++++++++++-------------------- + hw/intc/gicv3_internal.h | 2 +- + 3 files changed, 64 insertions(+), 65 deletions(-) + +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index 66eaa97198..2fe79f794d 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -367,6 +367,7 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) + GICv3State *s = ARM_GICV3(dev); + ARMGICv3Class *agc = ARM_GICV3_GET_CLASS(s); + Error *local_err = NULL; ++ int i; + + agc->parent_realize(dev, &local_err); + if (local_err) { +@@ -386,7 +387,9 @@ static void arm_gic_realize(DeviceState *dev, Error **errp) + return; + } + +- gicv3_init_cpuif(s); ++ for (i = 0; i < s->num_cpu; i++) { ++ gicv3_init_one_cpuif(s, i); ++ } + } + + static void arm_gicv3_class_init(ObjectClass *klass, void *data) +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index 3b212d91c8..56aa5efede 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -2597,78 +2597,74 @@ static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) + gicv3_cpuif_update(cs); + } + +-void gicv3_init_cpuif(GICv3State *s) ++void gicv3_init_one_cpuif(GICv3State *s, int ncpu) + { + /* Called from the GICv3 realize function; register our system + * registers with the CPU + */ +- int i; +- +- for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- GICv3CPUState *cs = &s->cpu[i]; +- +- /* Note that we can't just use the GICv3CPUState as an opaque pointer +- * in define_arm_cp_regs_with_opaque(), because when we're called back +- * it might be with code translated by CPU 0 but run by CPU 1, in +- * which case we'd get the wrong value. +- * So instead we define the regs with no ri->opaque info, and +- * get back to the GICv3CPUState from the CPUARMState. +- */ +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); +- if (arm_feature(&cpu->env, ARM_FEATURE_EL2) +- && cpu->gic_num_lrs) { +- int j; ++ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(ncpu)); ++ GICv3CPUState *cs = &s->cpu[ncpu]; ++ ++ /* Note that we can't just use the GICv3CPUState as an opaque pointer ++ * in define_arm_cp_regs_with_opaque(), because when we're called back ++ * it might be with code translated by CPU 0 but run by CPU 1, in ++ * which case we'd get the wrong value. ++ * So instead we define the regs with no ri->opaque info, and ++ * get back to the GICv3CPUState from the CPUARMState. ++ */ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ if (arm_feature(&cpu->env, ARM_FEATURE_EL2) ++ && cpu->gic_num_lrs) { ++ int j; + +- cs->maintenance_irq = cpu->gicv3_maintenance_interrupt; ++ cs->maintenance_irq = cpu->gicv3_maintenance_interrupt; + +- cs->num_list_regs = cpu->gic_num_lrs; +- cs->vpribits = cpu->gic_vpribits; +- cs->vprebits = cpu->gic_vprebits; ++ cs->num_list_regs = cpu->gic_num_lrs; ++ cs->vpribits = cpu->gic_vpribits; ++ cs->vprebits = cpu->gic_vprebits; + +- /* Check against architectural constraints: getting these +- * wrong would be a bug in the CPU code defining these, +- * and the implementation relies on them holding. +- */ +- g_assert(cs->vprebits <= cs->vpribits); +- g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); +- g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); ++ /* Check against architectural constraints: getting these ++ * wrong would be a bug in the CPU code defining these, ++ * and the implementation relies on them holding. ++ */ ++ g_assert(cs->vprebits <= cs->vpribits); ++ g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); ++ g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); + +- define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); ++ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); + +- for (j = 0; j < cs->num_list_regs; j++) { +- /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs +- * are split into two cp15 regs, LR (the low part, with the +- * same encoding as the AArch64 LR) and LRC (the high part). +- */ +- ARMCPRegInfo lr_regset[] = { +- { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, +- .opc0 = 3, .opc1 = 4, .crn = 12, +- .crm = 12 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, +- .cp = 15, .opc1 = 4, .crn = 12, +- .crm = 14 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- REGINFO_SENTINEL +- }; +- define_arm_cp_regs(cpu, lr_regset); +- } +- if (cs->vprebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); +- } +- if (cs->vprebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); +- } ++ for (j = 0; j < cs->num_list_regs; j++) { ++ /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs ++ * are split into two cp15 regs, LR (the low part, with the ++ * same encoding as the AArch64 LR) and LRC (the high part). ++ */ ++ ARMCPRegInfo lr_regset[] = { ++ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, ++ .opc0 = 3, .opc1 = 4, .crn = 12, ++ .crm = 12 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, ++ .cp = 15, .opc1 = 4, .crn = 12, ++ .crm = 14 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ REGINFO_SENTINEL ++ }; ++ define_arm_cp_regs(cpu, lr_regset); ++ } ++ if (cs->vprebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); ++ } ++ if (cs->vprebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); + } +- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); + } ++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); + } +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 05303a55c8..cfbfe8a549 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -297,7 +297,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, + void gicv3_dist_set_irq(GICv3State *s, int irq, int level); + void gicv3_redist_set_irq(GICv3CPUState *cs, int irq, int level); + void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); +-void gicv3_init_cpuif(GICv3State *s); ++void gicv3_init_one_cpuif(GICv3State *s, int ncpu); + + /** + * gicv3_cpuif_update: +-- +2.19.1 diff --git a/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch b/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch new file mode 100644 index 0000000000000000000000000000000000000000..6af9a8f4f55fa4ce936c9d5898cd5c232abcaa9a --- /dev/null +++ b/intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch @@ -0,0 +1,45 @@ +From f45964c7e0df4ef17457a9ea92bfd255064139e1 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Fri, 10 Apr 2020 12:49:12 +0800 +Subject: [PATCH] intc/kvm_gicv3: Factor out kvm_arm_gicv3_cpu_realize + +The CPU object of hotplugged CPU will be defer-created (during +hotplug session), so we must factor out realization code to let +it can be applied to individual CPU. + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3_kvm.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index b1e74147ba..b2936938cb 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -761,6 +761,12 @@ static void vm_change_state_handler(void *opaque, int running, + } + } + ++static void kvm_arm_gicv3_cpu_realize(GICv3State *s, int ncpu) ++{ ++ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(ncpu)); ++ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++} + + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { +@@ -791,9 +797,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + } + + for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ kvm_arm_gicv3_cpu_realize(s, i); + } + + /* Try to create the device via the device control API */ +-- +2.19.1 diff --git a/io-Don-t-use-flag-of-printf-format.patch b/io-Don-t-use-flag-of-printf-format.patch new file mode 100644 index 0000000000000000000000000000000000000000..61f3b71dda336dbc2c833b323880f7dd3be09dec --- /dev/null +++ b/io-Don-t-use-flag-of-printf-format.patch @@ -0,0 +1,32 @@ +From 0aa003cd0e117cb160da7d4b6e50630bf2fedfd6 Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Mon, 19 Oct 2020 20:12:02 +0800 +Subject: [PATCH] io: Don't use '#' flag of printf format +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: AlexChen +Signed-off-by: Daniel P. Berrangé +(cherry-picked from commit 77b7829e75) +--- + io/channel-websock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/io/channel-websock.c b/io/channel-websock.c +index fc36d44eba..d48a929e49 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -734,7 +734,7 @@ static int qio_channel_websock_decode_header(QIOChannelWebsock *ioc, + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_CLOSE && + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PING && + opcode != QIO_CHANNEL_WEBSOCK_OPCODE_PONG) { +- error_setg(errp, "unsupported opcode: %#04x; only binary, close, " ++ error_setg(errp, "unsupported opcode: 0x%04x; only binary, close, " + "ping, and pong websocket frames are supported", opcode); + qio_channel_websock_write_close( + ioc, QIO_CHANNEL_WEBSOCK_STATUS_INVALID_DATA , +-- +2.27.0 + diff --git a/iommu-Introduce-generic-header.patch b/iommu-Introduce-generic-header.patch new file mode 100644 index 0000000000000000000000000000000000000000..76e0c0c80ff83bfd8a5f0130ca73c0623e0efc35 --- /dev/null +++ b/iommu-Introduce-generic-header.patch @@ -0,0 +1,53 @@ +From e8055075dbbc932afccc1f18f4acc093fe9e4dc3 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 9 Jul 2019 12:20:12 +0200 +Subject: [PATCH] iommu: Introduce generic header + +This header is meant to exposes data types used by +several IOMMU devices such as struct for SVA and +nested stage configuration. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + include/hw/iommu/iommu.h | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + create mode 100644 include/hw/iommu/iommu.h + +diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h +new file mode 100644 +index 0000000000..12092bda7b +--- /dev/null ++++ b/include/hw/iommu/iommu.h +@@ -0,0 +1,28 @@ ++/* ++ * common header for iommu devices ++ * ++ * Copyright Red Hat, Inc. 2019 ++ * ++ * Authors: ++ * Eric Auger ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_HW_IOMMU_IOMMU_H ++#define QEMU_HW_IOMMU_IOMMU_H ++#ifdef __linux__ ++#include ++#endif ++ ++typedef struct IOMMUConfig { ++ union { ++#ifdef __linux__ ++ struct iommu_pasid_table_config pasid_cfg; ++#endif ++ }; ++} IOMMUConfig; ++ ++ ++#endif /* QEMU_HW_IOMMU_IOMMU_H */ +-- +2.27.0 + diff --git a/iotests-143-Create-socket-in-SOCK_DIR.patch b/iotests-143-Create-socket-in-SOCK_DIR.patch new file mode 100644 index 0000000000000000000000000000000000000000..31d6a8421e46d181deb1e7c6792f78546a3d873e --- /dev/null +++ b/iotests-143-Create-socket-in-SOCK_DIR.patch @@ -0,0 +1,59 @@ +From 2e8fecd9e963c740cfe73d0de4491541423e185f Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Thu, 17 Oct 2019 15:31:40 +0200 +Subject: [PATCH] iotests/143: Create socket in $SOCK_DIR + +Signed-off-by: Max Reitz +Reviewed-by: Eric Blake +Reviewed-by: Thomas Huth +Message-id: 20191017133155.5327-9-mreitz@redhat.com +Signed-off-by: Max Reitz +--- + tests/qemu-iotests/143 | 6 +++--- + tests/qemu-iotests/143.out | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 +index 92249ac8da..f649b36195 100755 +--- a/tests/qemu-iotests/143 ++++ b/tests/qemu-iotests/143 +@@ -29,7 +29,7 @@ status=1 # failure is the default! + _cleanup() + { + _cleanup_qemu +- rm -f "$TEST_DIR/nbd" ++ rm -f "$SOCK_DIR/nbd" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + +@@ -51,12 +51,12 @@ _send_qemu_cmd $QEMU_HANDLE \ + _send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'nbd-server-start', + 'arguments': { 'addr': { 'type': 'unix', +- 'data': { 'path': '$TEST_DIR/nbd' }}}}" \ ++ 'data': { 'path': '$SOCK_DIR/nbd' }}}}" \ + 'return' + + # This should just result in a client error, not in the server crashing + $QEMU_IO_PROG -f raw -c quit \ +- "nbd+unix:///no_such_export?socket=$TEST_DIR/nbd" 2>&1 \ ++ "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ + | _filter_qemu_io | _filter_nbd + + _send_qemu_cmd $QEMU_HANDLE \ +diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out +index ee71b5aa42..037d34a409 100644 +--- a/tests/qemu-iotests/143.out ++++ b/tests/qemu-iotests/143.out +@@ -1,7 +1,7 @@ + QA output created by 143 + {"return": {}} + {"return": {}} +-qemu-io: can't open device nbd+unix:///no_such_export?socket=TEST_DIR/nbd: Requested export not available ++qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'no_such_export' not present + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} +-- +2.27.0 + diff --git a/ip_reass-Fix-use-after-free.patch b/ip_reass-Fix-use-after-free.patch new file mode 100644 index 0000000000000000000000000000000000000000..b26e8afb629d7d768608fdc471a9cf754be36f7e --- /dev/null +++ b/ip_reass-Fix-use-after-free.patch @@ -0,0 +1,40 @@ +From 63b07dfe20a0d4971b0929d27359f478ba2d816b Mon Sep 17 00:00:00 2001 +From: Samuel Thibault +Date: Fri, 22 May 2020 10:52:55 +0800 +Subject: [PATCH] ip_reass: Fix use after free + +Using ip_deq after m_free might read pointers from an allocation reuse. + +This would be difficult to exploit, but that is still related with +CVE-2019-14378 which generates fragmented IP packets that would trigger this +issue and at least produce a DoS. +Signed-off-by: Samuel Thibault's avatarSamuel Thibault + +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +index 8c75d91..c07d7d4 100644 +--- a/slirp/src/ip_input.c ++++ b/slirp/src/ip_input.c +@@ -292,6 +292,7 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + */ + while (q != (struct ipasfrag *)&fp->frag_link && + ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; + i = (ip->ip_off + ip->ip_len) - q->ipf_off; + if (i < q->ipf_len) { + q->ipf_len -= i; +@@ -299,9 +300,10 @@ static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) + m_adj(dtom(slirp, q), i); + break; + } ++ prev = q; + q = q->ipf_next; +- m_free(dtom(slirp, q->ipf_prev)); +- ip_deq(q->ipf_prev); ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); + } + + insert: +-- +1.8.3.1 + diff --git a/json-Fix-a-memleak-in-parse_pair.patch b/json-Fix-a-memleak-in-parse_pair.patch new file mode 100644 index 0000000000000000000000000000000000000000..c39776e6160b48f0f5bd1834899a0d186b03eeb7 --- /dev/null +++ b/json-Fix-a-memleak-in-parse_pair.patch @@ -0,0 +1,116 @@ +From 503d231e06c159c1530a76b1740b3ec7e47619e5 Mon Sep 17 00:00:00 2001 +From: Alex Chen +Date: Fri, 13 Nov 2020 14:55:25 +0000 +Subject: [PATCH] json: Fix a memleak in parse_pair() + +In qobject_type(), NULL is returned when the 'QObject' returned from parse_value() is not of QString type, +and this 'QObject' memory will leaked. +So we need to first cache the 'QObject' returned from parse_value(), and finally +free 'QObject' memory at the end of the function. +Also, we add a testcast about invalid dict key. + +The memleak stack is as follows: +Direct leak of 32 byte(s) in 1 object(s) allocated from: + #0 0xfffe4b3c34fb in __interceptor_malloc (/lib64/libasan.so.4+0xd34fb) + #1 0xfffe4ae48aa3 in g_malloc (/lib64/libglib-2.0.so.0+0x58aa3) + #2 0xaaab3557d9f7 in qnum_from_int qemu/qobject/qnum.c:25 + #3 0xaaab35584d23 in parse_literal qemu/qobject/json-parser.c:511 + #4 0xaaab35584d23 in parse_value qemu/qobject/json-parser.c:554 + #5 0xaaab35583d77 in parse_pair qemu/qobject/json-parser.c:270 + #6 0xaaab355845db in parse_object qemu/qobject/json-parser.c:327 + #7 0xaaab355845db in parse_value qemu/qobject/json-parser.c:546 + #8 0xaaab35585b1b in json_parser_parse qemu/qobject/json-parser.c:580 + #9 0xaaab35583703 in json_message_process_token qemu/qobject/json-streamer.c:92 + #10 0xaaab355ddccf in json_lexer_feed_char qemu/qobject/json-lexer.c:313 + #11 0xaaab355de0eb in json_lexer_feed qemu/qobject/json-lexer.c:350 + #12 0xaaab354aff67 in tcp_chr_read qemu/chardev/char-socket.c:525 + #13 0xfffe4ae429db in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x529db) + #14 0xfffe4ae42d8f (/lib64/libglib-2.0.so.0+0x52d8f) + #15 0xfffe4ae430df in g_main_loop_run (/lib64/libglib-2.0.so.0+0x530df) + #16 0xaaab34d70bff in iothread_run qemu/iothread.c:82 + #17 0xaaab3559d71b in qemu_thread_start qemu/util/qemu-thread-posix.c:519 + +Fixes: 532fb5328473 ("qapi: Make more of qobject_to()") +Reported-by: Euler Robot +Signed-off-by: Alex Chen +Signed-off-by: Chen Qun +Signed-off-by: Markus Armbruster +Message-Id: <20201113145525.85151-1-alex.chen@huawei.com> +[Commit message tweaked] +(cherry-picked form commit 922d42bb) +--- + qobject/json-parser.c | 12 ++++++------ + tests/check-qjson.c | 9 +++++++++ + 2 files changed, 15 insertions(+), 6 deletions(-) + +diff --git a/qobject/json-parser.c b/qobject/json-parser.c +index 7d23e12e33..840909ea6a 100644 +--- a/qobject/json-parser.c ++++ b/qobject/json-parser.c +@@ -257,8 +257,9 @@ static JSONToken *parser_context_peek_token(JSONParserContext *ctxt) + */ + static int parse_pair(JSONParserContext *ctxt, QDict *dict) + { ++ QObject *key_obj = NULL; ++ QString *key; + QObject *value; +- QString *key = NULL; + JSONToken *peek, *token; + + peek = parser_context_peek_token(ctxt); +@@ -267,7 +268,8 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict) + goto out; + } + +- key = qobject_to(QString, parse_value(ctxt)); ++ key_obj = parse_value(ctxt); ++ key = qobject_to(QString, key_obj); + if (!key) { + parse_error(ctxt, peek, "key is not a string in object"); + goto out; +@@ -297,13 +299,11 @@ static int parse_pair(JSONParserContext *ctxt, QDict *dict) + + qdict_put_obj(dict, qstring_get_str(key), value); + +- qobject_unref(key); +- ++ qobject_unref(key_obj); + return 0; + + out: +- qobject_unref(key); +- ++ qobject_unref(key_obj); + return -1; + } + +diff --git a/tests/check-qjson.c b/tests/check-qjson.c +index fa2afccb0a..5e3e08fe79 100644 +--- a/tests/check-qjson.c ++++ b/tests/check-qjson.c +@@ -1415,6 +1415,14 @@ static void invalid_dict_comma(void) + g_assert(obj == NULL); + } + ++static void invalid_dict_key(void) ++{ ++ Error *err = NULL; ++ QObject *obj = qobject_from_json("{32:'abc'}", &err); ++ error_free_or_abort(&err); ++ g_assert(obj == NULL); ++} ++ + static void unterminated_literal(void) + { + Error *err = NULL; +@@ -1500,6 +1508,7 @@ int main(int argc, char **argv) + g_test_add_func("/errors/unterminated/dict_comma", unterminated_dict_comma); + g_test_add_func("/errors/invalid_array_comma", invalid_array_comma); + g_test_add_func("/errors/invalid_dict_comma", invalid_dict_comma); ++ g_test_add_func("/errors/invalid_dict_key", invalid_dict_key); + g_test_add_func("/errors/unterminated/literal", unterminated_literal); + g_test_add_func("/errors/limits/nesting", limits_nesting); + g_test_add_func("/errors/multiple_values", multiple_values); +-- +2.27.0 + diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfa8bf6a01201096881ec49e34ddf0ed18eec84f --- /dev/null +++ b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,99 @@ +From ccfc5c99103e2f633084c906197075392f625a80 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 21 Nov 2019 16:56:45 +0000 +Subject: [PATCH] kvm: Reallocate dirty_bmap when we change a slot + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6828f6a1f9..5a6b89cc2a 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -536,6 +536,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -568,23 +589,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1066,6 +1073,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +2.27.0 + diff --git a/kvm-split-too-big-memory-section-on-several-memslots.patch b/kvm-split-too-big-memory-section-on-several-memslots.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a94e21a773498e07764996501664313b9c98522 --- /dev/null +++ b/kvm-split-too-big-memory-section-on-several-memslots.patch @@ -0,0 +1,246 @@ +From 33f5a810b0edc1ac67163f396bd345e04b5c11e8 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 24 Sep 2019 10:47:50 -0400 +Subject: [PATCH] kvm: split too big memory section on several memslots + +Max memslot size supported by kvm on s390 is 8Tb, +move logic of splitting RAM in chunks upto 8T to KVM code. + +This way it will hide KVM specific restrictions in KVM code +and won't affect board level design decisions. Which would allow +us to avoid misusing memory_region_allocate_system_memory() API +and eventually use a single hostmem backend for guest RAM. + +Signed-off-by: Igor Mammedov +Message-Id: <20190924144751.24149-4-imammedo@redhat.com> +Reviewed-by: Peter Xu +Acked-by: Paolo Bonzini +Signed-off-by: Christian Borntraeger +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 124 +++++++++++++++++++++++++-------------- + include/sysemu/kvm_int.h | 1 + + 2 files changed, 81 insertions(+), 44 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 84edbe8bb1..6828f6a1f9 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed; + bool kvm_ioeventfd_any_length_allowed; + bool kvm_msi_use_devid; + static bool kvm_immediate_exit; ++static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { + KVM_CAP_INFO(USER_MEMORY), +@@ -458,7 +459,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, + static int kvm_section_update_flags(KVMMemoryListener *kml, + MemoryRegionSection *section) + { +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + KVMSlot *mem; + int ret = 0; + +@@ -469,13 +470,18 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, + + kvm_slots_lock(kml); + +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- /* We don't have a slot if we want to trap every access. */ +- goto out; +- } ++ while (size && !ret) { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ /* We don't have a slot if we want to trap every access. */ ++ goto out; ++ } + +- ret = kvm_slot_update_flags(kml, mem, section->mr); ++ ret = kvm_slot_update_flags(kml, mem, section->mr); ++ start_addr += slot_size; ++ size -= slot_size; ++ } + + out: + kvm_slots_unlock(kml); +@@ -548,11 +554,15 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + struct kvm_dirty_log d = {}; + KVMSlot *mem; + hwaddr start_addr, size; ++ hwaddr slot_size, slot_offset = 0; + int ret = 0; + + size = kvm_align_section(section, &start_addr); +- if (size) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); ++ while (size) { ++ MemoryRegionSection subsection = *section; ++ ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { + /* We don't have a slot if we want to trap every access. */ + goto out; +@@ -570,11 +580,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. + */ +- size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + if (!mem->dirty_bmap) { ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(size); ++ mem->dirty_bmap = g_malloc0(bitmap_size); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -585,7 +595,13 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); ++ subsection.offset_within_region += slot_offset; ++ subsection.size = int128_make64(slot_size); ++ kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); ++ ++ slot_offset += slot_size; ++ start_addr += slot_size; ++ size -= slot_size; + } + out: + return ret; +@@ -974,6 +990,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) + return NULL; + } + ++void kvm_set_max_memslot_size(hwaddr max_slot_size) ++{ ++ g_assert( ++ ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size ++ ); ++ kvm_max_slot_size = max_slot_size; ++} ++ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -981,7 +1005,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + int err; + MemoryRegion *mr = section->mr; + bool writeable = !mr->readonly && !mr->rom_device; +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + void *ram; + + if (!memory_region_is_ram(mr)) { +@@ -1006,41 +1030,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + kvm_slots_lock(kml); + + if (!add) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- goto out; +- } +- if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { +- kvm_physical_sync_dirty_bitmap(kml, section); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ goto out; ++ } ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ kvm_physical_sync_dirty_bitmap(kml, section); ++ } + +- /* unregister the slot */ +- g_free(mem->dirty_bmap); +- mem->dirty_bmap = NULL; +- mem->memory_size = 0; +- mem->flags = 0; +- err = kvm_set_user_memory_region(kml, mem, false); +- if (err) { +- fprintf(stderr, "%s: error unregistering slot: %s\n", +- __func__, strerror(-err)); +- abort(); +- } ++ /* unregister the slot */ ++ g_free(mem->dirty_bmap); ++ mem->dirty_bmap = NULL; ++ mem->memory_size = 0; ++ mem->flags = 0; ++ err = kvm_set_user_memory_region(kml, mem, false); ++ if (err) { ++ fprintf(stderr, "%s: error unregistering slot: %s\n", ++ __func__, strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ size -= slot_size; ++ } while (size); + goto out; + } + + /* register the new slot */ +- mem = kvm_alloc_slot(kml); +- mem->memory_size = size; +- mem->start_addr = start_addr; +- mem->ram = ram; +- mem->flags = kvm_mem_flags(mr); +- +- err = kvm_set_user_memory_region(kml, mem, true); +- if (err) { +- fprintf(stderr, "%s: error registering slot: %s\n", __func__, +- strerror(-err)); +- abort(); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_alloc_slot(kml); ++ mem->memory_size = slot_size; ++ mem->start_addr = start_addr; ++ mem->ram = ram; ++ mem->flags = kvm_mem_flags(mr); ++ ++ err = kvm_set_user_memory_region(kml, mem, true); ++ if (err) { ++ fprintf(stderr, "%s: error registering slot: %s\n", __func__, ++ strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ ram += slot_size; ++ size -= slot_size; ++ } while (size); + + out: + kvm_slots_unlock(kml); +@@ -2880,6 +2915,7 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + + for (i = 0; i < kvm->nr_as; ++i) { + if (kvm->as[i].as == as && kvm->as[i].ml) { ++ size = MIN(kvm_max_slot_size, size); + return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, + start_addr, size); + } +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 787dbc7770..f8e884f146 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -43,4 +43,5 @@ typedef struct KVMMemoryListener { + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id); + ++void kvm_set_max_memslot_size(hwaddr max_slot_size); + #endif +-- +2.27.0 + diff --git a/libvhost-user-Fix-some-memtable-remap-cases.patch b/libvhost-user-Fix-some-memtable-remap-cases.patch new file mode 100644 index 0000000000000000000000000000000000000000..4f4d0c9f7d4d55064785426f3014ee6efbac1d63 --- /dev/null +++ b/libvhost-user-Fix-some-memtable-remap-cases.patch @@ -0,0 +1,101 @@ +From 8fa62daca5978e77ed690797a882c3d0aad8d0d4 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Mon, 12 Aug 2019 17:35:19 +0100 +Subject: [PATCH] libvhost-user: Fix some memtable remap cases + +If a new setmemtable command comes in once the vhost threads are +running, it will remap the guests address space and the threads +will now be looking in the wrong place. + +Fortunately we're running this command under lock, so we can +update the queue mappings so that threads will look in the new-right +place. + +Note: This doesn't fix things that the threads might be doing +without a lock (e.g. a readv/writev!) That's for another time. + +Signed-off-by: Dr. David Alan Gilbert +--- + contrib/libvhost-user/libvhost-user.c | 33 ++++++++++++++++++++------- + contrib/libvhost-user/libvhost-user.h | 3 +++ + 2 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index fb75837032..164e6d1df8 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -559,6 +559,21 @@ vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg) + return false; + } + ++static bool ++map_ring(VuDev *dev, VuVirtq *vq) ++{ ++ vq->vring.desc = qva_to_va(dev, vq->vra.desc_user_addr); ++ vq->vring.used = qva_to_va(dev, vq->vra.used_user_addr); ++ vq->vring.avail = qva_to_va(dev, vq->vra.avail_user_addr); ++ ++ DPRINT("Setting virtq addresses:\n"); ++ DPRINT(" vring_desc at %p\n", vq->vring.desc); ++ DPRINT(" vring_used at %p\n", vq->vring.used); ++ DPRINT(" vring_avail at %p\n", vq->vring.avail); ++ ++ return !(vq->vring.desc && vq->vring.used && vq->vring.avail); ++} ++ + static bool + vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg) + { +@@ -762,6 +777,14 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) + close(vmsg->fds[i]); + } + ++ for (i = 0; i < dev->max_queues; i++) { ++ if (dev->vq[i].vring.desc) { ++ if (map_ring(dev, &dev->vq[i])) { ++ vu_panic(dev, "remaping queue %d during setmemtable", i); ++ } ++ } ++ } ++ + return false; + } + +@@ -848,18 +871,12 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) + DPRINT(" avail_user_addr: 0x%016" PRIx64 "\n", vra->avail_user_addr); + DPRINT(" log_guest_addr: 0x%016" PRIx64 "\n", vra->log_guest_addr); + ++ vq->vra = *vra; + vq->vring.flags = vra->flags; +- vq->vring.desc = qva_to_va(dev, vra->desc_user_addr); +- vq->vring.used = qva_to_va(dev, vra->used_user_addr); +- vq->vring.avail = qva_to_va(dev, vra->avail_user_addr); + vq->vring.log_guest_addr = vra->log_guest_addr; + +- DPRINT("Setting virtq addresses:\n"); +- DPRINT(" vring_desc at %p\n", vq->vring.desc); +- DPRINT(" vring_used at %p\n", vq->vring.used); +- DPRINT(" vring_avail at %p\n", vq->vring.avail); + +- if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) { ++ if (map_ring(dev, vq)) { + vu_panic(dev, "Invalid vring_addr message"); + return false; + } +diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h +index 1844b6f8d4..5cb7708559 100644 +--- a/contrib/libvhost-user/libvhost-user.h ++++ b/contrib/libvhost-user/libvhost-user.h +@@ -327,6 +327,9 @@ typedef struct VuVirtq { + int err_fd; + unsigned int enable; + bool started; ++ ++ /* Guest addresses of our ring */ ++ struct vhost_vring_addr vra; + } VuVirtq; + + enum VuWatchCondtion { +-- +2.27.0 + diff --git a/libvhost-user-fix-SLAVE_SEND_FD-handling.patch b/libvhost-user-fix-SLAVE_SEND_FD-handling.patch new file mode 100644 index 0000000000000000000000000000000000000000..71cbf7baa7b59006c74a8eadb9b74b10079a9a9d --- /dev/null +++ b/libvhost-user-fix-SLAVE_SEND_FD-handling.patch @@ -0,0 +1,42 @@ +From 28a9a3558a427493049723fff390add7026653eb Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Tue, 3 Sep 2019 23:04:22 +0300 +Subject: [PATCH] libvhost-user: fix SLAVE_SEND_FD handling + +It doesn't look like this could possibly work properly since +VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD is defined to 10, but the +dev->protocol_features has a bitmap. I suppose the peer this +was tested with also supported VHOST_USER_PROTOCOL_F_LOG_SHMFD, +in which case the test would always be false, but nevertheless +the code seems wrong. + +Use has_feature() to fix this. + +Fixes: d84599f56c82 ("libvhost-user: support host notifier") +Signed-off-by: Johannes Berg +Message-Id: <20190903200422.11693-1-johannes@sipsolutions.net> +Reviewed-by: Tiwei Bie +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8726b70b449896f1211f869ec4f608904f027207) +Signed-off-by: Michael Roth +--- + contrib/libvhost-user/libvhost-user.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c +index 4b36e35a82..cb5f5770e4 100644 +--- a/contrib/libvhost-user/libvhost-user.c ++++ b/contrib/libvhost-user/libvhost-user.c +@@ -1097,7 +1097,8 @@ bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + + vmsg.fd_num = fd_num; + +- if ((dev->protocol_features & VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) == 0) { ++ if (!has_feature(dev->protocol_features, ++ VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) { + return false; + } + +-- +2.23.0 diff --git a/linux-headers-Update-against-Add-migration-support-f.patch b/linux-headers-Update-against-Add-migration-support-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..1bfef98c0c9b6771ccbe2fbd700a233e09cd9baf --- /dev/null +++ b/linux-headers-Update-against-Add-migration-support-f.patch @@ -0,0 +1,517 @@ +From 7ab9ce4016ec48e0af8010f742ee39fc84342d00 Mon Sep 17 00:00:00 2001 +From: Jinhao Gao +Date: Fri, 23 Jul 2021 14:55:12 +0800 +Subject: [PATCH] linux headers: Update against "Add migration support for VFIO + devices" + +Update linux-headers/linux/vfio.h against Linux 5.9-rc7 for the +VFIO migration support series. + +Signed-off-by: Jinhao Gao +Signed-off-by: Shenming Lu +--- + linux-headers/linux/vfio.h | 420 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 405 insertions(+), 15 deletions(-) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 24f505199f..a90672494d 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -295,15 +295,39 @@ struct vfio_region_info_cap_type { + __u32 subtype; /* type specific */ + }; + ++/* ++ * List of region types, global per bus driver. ++ * If you introduce a new type, please add it here. ++ */ ++ ++/* PCI region type containing a PCI vendor part */ + #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) + #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) ++#define VFIO_REGION_TYPE_GFX (1) ++#define VFIO_REGION_TYPE_CCW (2) ++#define VFIO_REGION_TYPE_MIGRATION (3) ++ ++/* sub-types for VFIO_REGION_TYPE_PCI_* */ + +-/* 8086 Vendor sub-types */ ++/* 8086 vendor PCI sub-types */ + #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) + #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) + #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) + +-#define VFIO_REGION_TYPE_GFX (1) ++/* 10de vendor PCI sub-types */ ++/* ++ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. ++ */ ++#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) ++ ++/* 1014 vendor PCI sub-types */ ++/* ++ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU ++ * to do TLB invalidation on a GPU. ++ */ ++#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) ++ ++/* sub-types for VFIO_REGION_TYPE_GFX */ + #define VFIO_REGION_SUBTYPE_GFX_EDID (1) + + /** +@@ -353,24 +377,237 @@ struct vfio_region_gfx_edid { + #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 + }; + +-#define VFIO_REGION_TYPE_CCW (2) +-/* ccw sub-types */ ++/* sub-types for VFIO_REGION_TYPE_CCW */ + #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) ++#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) ++#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + +-/* +- * 10de vendor sub-type +- * +- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. +- */ +-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) ++/* sub-types for VFIO_REGION_TYPE_MIGRATION */ ++#define VFIO_REGION_SUBTYPE_MIGRATION (1) + + /* +- * 1014 vendor sub-type ++ * The structure vfio_device_migration_info is placed at the 0th offset of ++ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related ++ * migration information. Field accesses from this structure are only supported ++ * at their native width and alignment. Otherwise, the result is undefined and ++ * vendor drivers should return an error. + * +- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU +- * to do TLB invalidation on a GPU. ++ * device_state: (read/write) ++ * - The user application writes to this field to inform the vendor driver ++ * about the device state to be transitioned to. ++ * - The vendor driver should take the necessary actions to change the ++ * device state. After successful transition to a given state, the ++ * vendor driver should return success on write(device_state, state) ++ * system call. If the device state transition fails, the vendor driver ++ * should return an appropriate -errno for the fault condition. ++ * - On the user application side, if the device state transition fails, ++ * that is, if write(device_state, state) returns an error, read ++ * device_state again to determine the current state of the device from ++ * the vendor driver. ++ * - The vendor driver should return previous state of the device unless ++ * the vendor driver has encountered an internal error, in which case ++ * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. ++ * - The user application must use the device reset ioctl to recover the ++ * device from VFIO_DEVICE_STATE_ERROR state. If the device is ++ * indicated to be in a valid device state by reading device_state, the ++ * user application may attempt to transition the device to any valid ++ * state reachable from the current state or terminate itself. ++ * ++ * device_state consists of 3 bits: ++ * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, ++ * it indicates the _STOP state. When the device state is changed to ++ * _STOP, driver should stop the device before write() returns. ++ * - If bit 1 is set, it indicates the _SAVING state, which means that the ++ * driver should start gathering device state information that will be ++ * provided to the VFIO user application to save the device's state. ++ * - If bit 2 is set, it indicates the _RESUMING state, which means that ++ * the driver should prepare to resume the device. Data provided through ++ * the migration region should be used to resume the device. ++ * Bits 3 - 31 are reserved for future use. To preserve them, the user ++ * application should perform a read-modify-write operation on this ++ * field when modifying the specified bits. ++ * ++ * +------- _RESUMING ++ * |+------ _SAVING ++ * ||+----- _RUNNING ++ * ||| ++ * 000b => Device Stopped, not saving or resuming ++ * 001b => Device running, which is the default state ++ * 010b => Stop the device & save the device state, stop-and-copy state ++ * 011b => Device running and save the device state, pre-copy state ++ * 100b => Device stopped and the device state is resuming ++ * 101b => Invalid state ++ * 110b => Error state ++ * 111b => Invalid state ++ * ++ * State transitions: ++ * ++ * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP ++ * (100b) (001b) (011b) (010b) (000b) ++ * 0. Running or default state ++ * | ++ * ++ * 1. Normal Shutdown (optional) ++ * |------------------------------------->| ++ * ++ * 2. Save the state or suspend ++ * |------------------------->|---------->| ++ * ++ * 3. Save the state during live migration ++ * |----------->|------------>|---------->| ++ * ++ * 4. Resuming ++ * |<---------| ++ * ++ * 5. Resumed ++ * |--------->| ++ * ++ * 0. Default state of VFIO device is _RUNNNG when the user application starts. ++ * 1. During normal shutdown of the user application, the user application may ++ * optionally change the VFIO device state from _RUNNING to _STOP. This ++ * transition is optional. The vendor driver must support this transition but ++ * must not require it. ++ * 2. When the user application saves state or suspends the application, the ++ * device state transitions from _RUNNING to stop-and-copy and then to _STOP. ++ * On state transition from _RUNNING to stop-and-copy, driver must stop the ++ * device, save the device state and send it to the application through the ++ * migration region. The sequence to be followed for such transition is given ++ * below. ++ * 3. In live migration of user application, the state transitions from _RUNNING ++ * to pre-copy, to stop-and-copy, and to _STOP. ++ * On state transition from _RUNNING to pre-copy, the driver should start ++ * gathering the device state while the application is still running and send ++ * the device state data to application through the migration region. ++ * On state transition from pre-copy to stop-and-copy, the driver must stop ++ * the device, save the device state and send it to the user application ++ * through the migration region. ++ * Vendor drivers must support the pre-copy state even for implementations ++ * where no data is provided to the user before the stop-and-copy state. The ++ * user must not be required to consume all migration data before the device ++ * transitions to a new state, including the stop-and-copy state. ++ * The sequence to be followed for above two transitions is given below. ++ * 4. To start the resuming phase, the device state should be transitioned from ++ * the _RUNNING to the _RESUMING state. ++ * In the _RESUMING state, the driver should use the device state data ++ * received through the migration region to resume the device. ++ * 5. After providing saved device data to the driver, the application should ++ * change the state from _RESUMING to _RUNNING. ++ * ++ * reserved: ++ * Reads on this field return zero and writes are ignored. ++ * ++ * pending_bytes: (read only) ++ * The number of pending bytes still to be migrated from the vendor driver. ++ * ++ * data_offset: (read only) ++ * The user application should read data_offset field from the migration ++ * region. The user application should read the device data from this ++ * offset within the migration region during the _SAVING state or write ++ * the device data during the _RESUMING state. See below for details of ++ * sequence to be followed. ++ * ++ * data_size: (read/write) ++ * The user application should read data_size to get the size in bytes of ++ * the data copied in the migration region during the _SAVING state and ++ * write the size in bytes of the data copied in the migration region ++ * during the _RESUMING state. ++ * ++ * The format of the migration region is as follows: ++ * ------------------------------------------------------------------ ++ * |vfio_device_migration_info| data section | ++ * | | /////////////////////////////// | ++ * ------------------------------------------------------------------ ++ * ^ ^ ++ * offset 0-trapped part data_offset ++ * ++ * The structure vfio_device_migration_info is always followed by the data ++ * section in the region, so data_offset will always be nonzero. The offset ++ * from where the data is copied is decided by the kernel driver. The data ++ * section can be trapped, mmapped, or partitioned, depending on how the kernel ++ * driver defines the data section. The data section partition can be defined ++ * as mapped by the sparse mmap capability. If mmapped, data_offset must be ++ * page aligned, whereas initial section which contains the ++ * vfio_device_migration_info structure, might not end at the offset, which is ++ * page aligned. The user is not required to access through mmap regardless ++ * of the capabilities of the region mmap. ++ * The vendor driver should determine whether and how to partition the data ++ * section. The vendor driver should return data_offset accordingly. ++ * ++ * The sequence to be followed while in pre-copy state and stop-and-copy state ++ * is as follows: ++ * a. Read pending_bytes, indicating the start of a new iteration to get device ++ * data. Repeated read on pending_bytes at this stage should have no side ++ * effects. ++ * If pending_bytes == 0, the user application should not iterate to get data ++ * for that device. ++ * If pending_bytes > 0, perform the following steps. ++ * b. Read data_offset, indicating that the vendor driver should make data ++ * available through the data section. The vendor driver should return this ++ * read operation only after data is available from (region + data_offset) ++ * to (region + data_offset + data_size). ++ * c. Read data_size, which is the amount of data in bytes available through ++ * the migration region. ++ * Read on data_offset and data_size should return the offset and size of ++ * the current buffer if the user application reads data_offset and ++ * data_size more than once here. ++ * d. Read data_size bytes of data from (region + data_offset) from the ++ * migration region. ++ * e. Process the data. ++ * f. Read pending_bytes, which indicates that the data from the previous ++ * iteration has been read. If pending_bytes > 0, go to step b. ++ * ++ * The user application can transition from the _SAVING|_RUNNING ++ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the ++ * number of pending bytes. The user application should iterate in _SAVING ++ * (stop-and-copy) until pending_bytes is 0. ++ * ++ * The sequence to be followed while _RESUMING device state is as follows: ++ * While data for this device is available, repeat the following steps: ++ * a. Read data_offset from where the user application should write data. ++ * b. Write migration data starting at the migration region + data_offset for ++ * the length determined by data_size from the migration source. ++ * c. Write data_size, which indicates to the vendor driver that data is ++ * written in the migration region. Vendor driver must return this write ++ * operations on consuming data. Vendor driver should apply the ++ * user-provided migration region data to the device resume state. ++ * ++ * If an error occurs during the above sequences, the vendor driver can return ++ * an error code for next read() or write() operation, which will terminate the ++ * loop. The user application should then take the next necessary action, for ++ * example, failing migration or terminating the user application. ++ * ++ * For the user application, data is opaque. The user application should write ++ * data in the same order as the data is received and the data should be of ++ * same transaction size at the source. + */ +-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) ++ ++struct vfio_device_migration_info { ++ __u32 device_state; /* VFIO device state */ ++#define VFIO_DEVICE_STATE_STOP (0) ++#define VFIO_DEVICE_STATE_RUNNING (1 << 0) ++#define VFIO_DEVICE_STATE_SAVING (1 << 1) ++#define VFIO_DEVICE_STATE_RESUMING (1 << 2) ++#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ ++ VFIO_DEVICE_STATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING) ++ ++#define VFIO_DEVICE_STATE_VALID(state) \ ++ (state & VFIO_DEVICE_STATE_RESUMING ? \ ++ (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) ++ ++#define VFIO_DEVICE_STATE_IS_ERROR(state) \ ++ ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING)) ++ ++#define VFIO_DEVICE_STATE_SET_ERROR(state) \ ++ ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING) ++ ++ __u32 reserved; ++ __u64 pending_bytes; ++ __u64 data_offset; ++ __u64 data_size; ++}; + + /* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped +@@ -570,6 +807,7 @@ enum { + + enum { + VFIO_CCW_IO_IRQ_INDEX, ++ VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_NUM_IRQS + }; + +@@ -700,6 +938,43 @@ struct vfio_device_ioeventfd { + + #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) + ++/** ++ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, ++ * struct vfio_device_feature) ++ * ++ * Get, set, or probe feature data of the device. The feature is selected ++ * using the FEATURE_MASK portion of the flags field. Support for a feature ++ * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe ++ * may optionally include the GET and/or SET bits to determine read vs write ++ * access of the feature respectively. Probing a feature will return success ++ * if the feature is supported and all of the optionally indicated GET/SET ++ * methods are supported. The format of the data portion of the structure is ++ * specific to the given feature. The data portion is not required for ++ * probing. GET and SET are mutually exclusive, except for use with PROBE. ++ * ++ * Return 0 on success, -errno on failure. ++ */ ++struct vfio_device_feature { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ ++#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ ++#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ ++#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ ++ __u8 data[]; ++}; ++ ++#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) ++ ++/* ++ * Provide support for setting a PCI VF Token, which is used as a shared ++ * secret between PF and VF drivers. This feature may only be set on a ++ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing ++ * open VFs. Data provided when setting this feature is a 16-byte array ++ * (__u8 b[16]), representing a UUID. ++ */ ++#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +@@ -714,7 +989,54 @@ struct vfio_iommu_type1_info { + __u32 argsz; + __u32 flags; + #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ +- __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ ++ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++ __u32 cap_offset; /* Offset within info struct of first cap */ ++}; ++ ++/* ++ * The IOVA capability allows to report the valid IOVA range(s) ++ * excluding any non-relaxable reserved regions exposed by ++ * devices attached to the container. Any DMA map attempt ++ * outside the valid iova range will return error. ++ * ++ * The structures below define version 1 of this capability. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 ++ ++struct vfio_iova_range { ++ __u64 start; ++ __u64 end; ++}; ++ ++struct vfio_iommu_type1_info_cap_iova_range { ++ struct vfio_info_cap_header header; ++ __u32 nr_iovas; ++ __u32 reserved; ++ struct vfio_iova_range iova_ranges[]; ++}; ++ ++/* ++ * The migration capability allows to report supported features for migration. ++ * ++ * The structures below define version 1 of this capability. ++ * ++ * The existence of this capability indicates that IOMMU kernel driver supports ++ * dirty page logging. ++ * ++ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty ++ * page logging. ++ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap ++ * size in bytes that can be used by user applications when getting the dirty ++ * bitmap. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 ++ ++struct vfio_iommu_type1_info_cap_migration { ++ struct vfio_info_cap_header header; ++ __u32 flags; ++ __u64 pgsize_bitmap; ++ __u64 max_dirty_bitmap_size; /* in bytes */ + }; + + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) +@@ -737,6 +1059,12 @@ struct vfio_iommu_type1_dma_map { + + #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) + ++struct vfio_bitmap { ++ __u64 pgsize; /* page size for bitmap in bytes */ ++ __u64 size; /* in bytes */ ++ __u64 *data; /* one bit per page */ ++}; ++ + /** + * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_dma_unmap) +@@ -746,12 +1074,23 @@ struct vfio_iommu_type1_dma_map { + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. ++ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap ++ * before unmapping IO virtual addresses. When this flag is set, the user must ++ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated ++ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. ++ * A bit in the bitmap represents one page, of user provided page size in ++ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set ++ * indicates that the page at that offset from iova is dirty. A Bitmap of the ++ * pages in the range of unmapped size is returned in the user-provided ++ * vfio_bitmap.data. + */ + struct vfio_iommu_type1_dma_unmap { + __u32 argsz; + __u32 flags; ++#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ ++ __u8 data[]; + }; + + #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) +@@ -763,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap { + #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) + #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) + ++/** ++ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, ++ * struct vfio_iommu_type1_dirty_bitmap) ++ * IOCTL is used for dirty pages logging. ++ * Caller should set flag depending on which operation to perform, details as ++ * below: ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs ++ * the IOMMU driver to log pages that are dirtied or potentially dirtied by ++ * the device; designed to be used when a migration is in progress. Dirty pages ++ * are logged until logging is disabled by user application by calling the IOCTL ++ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs ++ * the IOMMU driver to stop logging dirtied pages. ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set ++ * returns the dirty pages bitmap for IOMMU container for a given IOVA range. ++ * The user must specify the IOVA range and the pgsize through the structure ++ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface ++ * supports getting a bitmap of the smallest supported pgsize only and can be ++ * modified in future to get a bitmap of any specified supported pgsize. The ++ * user must provide a zeroed memory area for the bitmap memory and specify its ++ * size in bitmap.size. One bit is used to represent one page consecutively ++ * starting from iova offset. The user should provide page size in bitmap.pgsize ++ * field. A bit set in the bitmap indicates that the page at that offset from ++ * iova is dirty. The caller must set argsz to a value including the size of ++ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the ++ * actual bitmap. If dirty pages logging is not enabled, an error will be ++ * returned. ++ * ++ * Only one of the flags _START, _STOP and _GET may be specified at a time. ++ * ++ */ ++struct vfio_iommu_type1_dirty_bitmap { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) ++ __u8 data[]; ++}; ++ ++struct vfio_iommu_type1_dirty_bitmap_get { ++ __u64 iova; /* IO virtual address */ ++ __u64 size; /* Size of iova range */ ++ struct vfio_bitmap bitmap; ++}; ++ ++#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) ++ + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + + /* +-- +2.27.0 + diff --git a/linux-headers-update-against-5.10-and-manual-clear-v.patch b/linux-headers-update-against-5.10-and-manual-clear-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..0315fc2c1a30be23b4643c30d783e5259ef11931 --- /dev/null +++ b/linux-headers-update-against-5.10-and-manual-clear-v.patch @@ -0,0 +1,90 @@ +From 79efeccd41d761b68946df68e5431eff399ccbd5 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:03 +0800 +Subject: [PATCH] linux-headers: update against 5.10 and manual clear vfio + dirty log series + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, update the header to add them. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + linux-headers/linux/vfio.h | 37 ++++++++++++++++++++++++++++++++++++- + 1 file changed, 36 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index a90672494d..120387ba58 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -46,6 +46,16 @@ + */ + #define VFIO_NOIOMMU_IOMMU 8 + ++/* ++ * The vfio_iommu driver may support user clears dirty log manually, which means ++ * dirty log can be requested to not cleared automatically after dirty log is ++ * copied to userspace, it's user's duty to clear dirty log. ++ * ++ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP. ++ */ ++#define VFIO_DIRTY_LOG_MANUAL_CLEAR 11 ++ + /* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between +@@ -1074,6 +1084,7 @@ struct vfio_bitmap { + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. ++ * + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated +@@ -1133,8 +1144,30 @@ struct vfio_iommu_type1_dma_unmap { + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * +- * Only one of the flags _START, _STOP and _GET may be specified at a time. ++ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying ++ * dirty bitmap is not cleared automatically. The user can clear it manually by ++ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set. + * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set, ++ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap ++ * for IOMMU container for a given IOVA range. The user must specify the IOVA ++ * range, the bitmap and the pgsize through the structure ++ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface ++ * supports clearing a bitmap of the smallest supported pgsize only and can be ++ * modified in future to clear a bitmap of any specified supported pgsize. The ++ * user must provide a memory area for the bitmap memory and specify its size ++ * in bitmap.size. One bit is used to represent one page consecutively starting ++ * from iova offset. The user should provide page size in bitmap.pgsize field. ++ * A bit set in the bitmap indicates that the page at that offset from iova is ++ * cleared the dirty status, and dirty tracking is re-enabled for that page. The ++ * caller must set argsz to a value including the size of structure ++ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If ++ * dirty pages logging is not enabled, an error will be returned. Note: user ++ * should clear dirty log before handle corresponding dirty pages. ++ * ++ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be ++ * specified at a time. + */ + struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; +@@ -1142,6 +1175,8 @@ struct vfio_iommu_type1_dirty_bitmap { + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR (1 << 3) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP (1 << 4) + __u8 data[]; + }; + +-- +2.27.0 + diff --git a/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch b/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch index 3d4e4ad5bb7e833e97c2d51f9ab2cf5ae555e823..731d06a74024c81bcc1ececeb79da2b873c2546f 100644 --- a/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch +++ b/linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch @@ -1,7 +1,7 @@ -From 896b9892d4df316b85836daa973e442c0c64cec6 Mon Sep 17 00:00:00 2001 +From 27a9f40b308efd8ddcb81e286441865b5a0cb541 Mon Sep 17 00:00:00 2001 From: Zenghui Yu -Date: Fri, 3 Jan 2020 17:16:55 +0800 -Subject: [PATCH 1/3] linux headers: update against "KVM/ARM: Fix >256 vcpus" +Date: Tue, 14 Apr 2020 21:52:42 +0800 +Subject: [PATCH] linux headers: update against "KVM/ARM: Fix >256 vcpus" This is part of upstream commit f363d039e883 ("linux headers: update against v5.4-rc1"), authored by Eric Auger . @@ -19,7 +19,7 @@ index e1f8b745..137a2730 100644 +++ b/linux-headers/asm-arm/kvm.h @@ -254,8 +254,10 @@ struct kvm_vcpu_events { #define KVM_DEV_ARM_ITS_CTRL_RESET 4 - + /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf @@ -30,12 +30,12 @@ index e1f8b745..137a2730 100644 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h -index e6a98c14..dfd3a028 100644 +index 2431ec35..cdfd5f33 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h -@@ -265,8 +265,10 @@ struct kvm_vcpu_events { +@@ -308,8 +308,10 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 - + /* KVM_IRQ_LINE irq field index values */ +#define KVM_ARM_IRQ_VCPU2_SHIFT 28 +#define KVM_ARM_IRQ_VCPU2_MASK 0xf @@ -46,17 +46,16 @@ index e6a98c14..dfd3a028 100644 #define KVM_ARM_IRQ_VCPU_MASK 0xff #define KVM_ARM_IRQ_NUM_SHIFT 0 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h -index b53ee597..086cea4d 100644 +index c8423e76..744e888e 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_VM_IPA_SIZE 165 - #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 + #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 - - #ifdef KVM_CAP_IRQ_ROUTING - --- -2.19.1 - + #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 + #define KVM_CAP_PPC_IRQ_XIVE 169 + #define KVM_CAP_ARM_SVE 170 +-- +2.23.0 diff --git a/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch b/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d0c6abf3d233a0694cec23a2097011c39d4fd1f --- /dev/null +++ b/linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch @@ -0,0 +1,34 @@ +From 7b4aded3f772ef43e2b600594f755eadd5da5958 Mon Sep 17 00:00:00 2001 +From: Jonathan Marler +Date: Sat, 2 May 2020 10:12:25 -0600 +Subject: [PATCH 3/5] linux-user/mmap.c: fix integer underflow in target_mremap + +Fixes: https://bugs.launchpad.net/bugs/1876373 + +This code path in mmap occurs when a page size is decreased with mremap. When a section of pages is shrunk, qemu calls mmap_reserve on the pages that were released. However, it has the diff operation reversed, subtracting the larger old_size from the smaller new_size. Instead, it should be subtracting the smaller new_size from the larger old_size. You can also see in the previous line of the change that this mmap_reserve call only occurs when old_size > new_size. + +Bug: https://bugs.launchpad.net/qemu/+bug/1876373 +Signed-off-by: Jonathan Marler +Reviewded-by: Laurent Vivier +Message-Id: <20200502161225.14346-1-johnnymarler@gmail.com> +Signed-off-by: Laurent Vivier +--- + linux-user/mmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/linux-user/mmap.c b/linux-user/mmap.c +index 46a6e3a7..2a9ca0c3 100644 +--- a/linux-user/mmap.c ++++ b/linux-user/mmap.c +@@ -740,7 +740,7 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, + if (prot == 0) { + host_addr = mremap(g2h(old_addr), old_size, new_size, flags); + if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) { +- mmap_reserve(old_addr + old_size, new_size - old_size); ++ mmap_reserve(old_addr + old_size, old_size - new_size); + } + } else { + errno = ENOMEM; +-- +2.23.0 + diff --git a/lm32-do-not-leak-memory-on-object_new-object_unref.patch b/lm32-do-not-leak-memory-on-object_new-object_unref.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ccc53684bb3d3224757209a4c1710883214fcc8 --- /dev/null +++ b/lm32-do-not-leak-memory-on-object_new-object_unref.patch @@ -0,0 +1,77 @@ +From d50be5295c49be1b6024f5902948b52e683b4c23 Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 14:18:35 +0800 +Subject: [PATCH] lm32: do not leak memory on object_new/object_unref +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Bottom halves and ptimers are malloced, but nothing in these +files is freeing memory allocated by instance_init. Since +these are sysctl devices that are never unrealized, just moving +the allocations to realize is enough to avoid the leak in +practice (and also to avoid upsetting asan when running +device-introspect-test). + +Signed-off-by: Paolo Bonzini +Reviewed-by: Philippe Mathieu-Daudé +--- + hw/timer/lm32_timer.c | 6 +++--- + hw/timer/milkymist-sysctl.c | 10 +++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c +index 6ce876c6..13f15825 100644 +--- a/hw/timer/lm32_timer.c ++++ b/hw/timer/lm32_timer.c +@@ -184,9 +184,6 @@ static void lm32_timer_init(Object *obj) + + sysbus_init_irq(dev, &s->irq); + +- s->bh = qemu_bh_new(timer_hit, s); +- s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); +- + memory_region_init_io(&s->iomem, obj, &timer_ops, s, + "timer", R_MAX * 4); + sysbus_init_mmio(dev, &s->iomem); +@@ -196,6 +193,9 @@ static void lm32_timer_realize(DeviceState *dev, Error **errp) + { + LM32TimerState *s = LM32_TIMER(dev); + ++ s->bh = qemu_bh_new(timer_hit, s); ++ s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT); ++ + ptimer_set_freq(s->ptimer, s->freq_hz); + } + +diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c +index a9d25087..2f1ecc6d 100644 +--- a/hw/timer/milkymist-sysctl.c ++++ b/hw/timer/milkymist-sysctl.c +@@ -280,11 +280,6 @@ static void milkymist_sysctl_init(Object *obj) + sysbus_init_irq(dev, &s->timer0_irq); + sysbus_init_irq(dev, &s->timer1_irq); + +- s->bh0 = qemu_bh_new(timer0_hit, s); +- s->bh1 = qemu_bh_new(timer1_hit, s); +- s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); +- s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); +- + memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s, + "milkymist-sysctl", R_MAX * 4); + sysbus_init_mmio(dev, &s->regs_region); +@@ -294,6 +289,11 @@ static void milkymist_sysctl_realize(DeviceState *dev, Error **errp) + { + MilkymistSysctlState *s = MILKYMIST_SYSCTL(dev); + ++ s->bh0 = qemu_bh_new(timer0_hit, s); ++ s->bh1 = qemu_bh_new(timer1_hit, s); ++ s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT); ++ s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT); ++ + ptimer_set_freq(s->ptimer0, s->freq_hz); + ptimer_set_freq(s->ptimer1, s->freq_hz); + } +-- +2.19.1 + diff --git a/log-Add-some-logs-on-VM-runtime-path.patch b/log-Add-some-logs-on-VM-runtime-path.patch new file mode 100644 index 0000000000000000000000000000000000000000..80eb8c39b4bcc4884c5a8fbfa43f28b808efb912 --- /dev/null +++ b/log-Add-some-logs-on-VM-runtime-path.patch @@ -0,0 +1,181 @@ +From 0c83403e6e3ab21a01941be4ec57b02388eeb9c4 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 22 May 2020 18:56:09 +0800 +Subject: [PATCH] log: Add some logs on VM runtime path + +Add logs on VM runtime path, to make it easier to do trouble shooting. + +Signed-off-by: Ying Fang + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index f6d2223..b4b0ed2 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -32,6 +32,7 @@ + #include "qemu/range.h" + #include "hw/virtio/virtio-bus.h" + #include "qapi/visitor.h" ++#include "qemu/log.h" + + #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) + +@@ -1659,7 +1660,9 @@ static void virtio_pci_device_unplugged(DeviceState *d) + VirtIOPCIProxy *proxy = VIRTIO_PCI(d); + bool modern = virtio_pci_modern(proxy); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + ++ qemu_log("unplug device name: %s\n", !vdev ? "NULL" : vdev->name); + virtio_pci_stop_ioeventfd(proxy); + + if (modern) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 7c3822c..79c2dcf 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -1172,7 +1172,14 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) + k->set_status(vdev, val); + } + vdev->status = val; +- ++ if (val) { ++ qemu_log("%s device status is %d that means %s\n", ++ vdev->name, val, ++ (val & VIRTIO_CONFIG_S_DRIVER_OK) ? "DRIVER OK" : ++ (val & VIRTIO_CONFIG_S_DRIVER) ? "DRIVER" : ++ (val & VIRTIO_CONFIG_S_ACKNOWLEDGE) ? "ACKNOWLEDGE" : ++ (val & VIRTIO_CONFIG_S_FAILED) ? "FAILED" : "UNKNOWN"); ++ } + return 0; + } + +@@ -1614,8 +1621,11 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + break; + } + +- if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) ++ if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { ++ qemu_log("unacceptable queue_size (%d) or num (%d)\n", ++ queue_size, i); + abort(); ++ } + + vdev->vq[i].vring.num = queue_size; + vdev->vq[i].vring.num_default = queue_size; +diff --git a/monitor/monitor.c b/monitor/monitor.c +index 3ef2817..6f726e8 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -28,6 +28,7 @@ + #include "qapi/qapi-emit-events.h" + #include "qapi/qmp/qdict.h" + #include "qapi/qmp/qstring.h" ++#include "qapi/qmp/qjson.h" + #include "qemu/error-report.h" + #include "qemu/option.h" + #include "sysemu/qtest.h" +@@ -254,6 +255,7 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) + { + Monitor *mon; + MonitorQMP *qmp_mon; ++ QString *json; + + trace_monitor_protocol_event_emit(event, qdict); + QTAILQ_FOREACH(mon, &mon_list, entry) { +@@ -264,6 +266,13 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) + qmp_mon = container_of(mon, MonitorQMP, common); + if (qmp_mon->commands != &qmp_cap_negotiation_commands) { + qmp_send_response(qmp_mon, qdict); ++ json = qobject_to_json(QOBJECT(qdict)); ++ if (json) { ++ if (!strstr(json->string, "RTC_CHANGE")) { ++ qemu_log("%s\n", qstring_get_str(json)); ++ } ++ qobject_unref(json); ++ } + } + } + } +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index e2c366e..6dfdad5 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -17,7 +17,9 @@ + #include "qapi/qmp/qdict.h" + #include "qapi/qmp/qjson.h" + #include "qapi/qmp/qbool.h" ++#include "qapi/qmp/qstring.h" + #include "sysemu/sysemu.h" ++#include "qemu/log.h" + + static QDict *qmp_dispatch_check_obj(const QObject *request, bool allow_oob, + Error **errp) +@@ -83,6 +85,7 @@ static QObject *do_qmp_dispatch(QmpCommandList *cmds, QObject *request, + const char *command; + QDict *args, *dict; + QmpCommand *cmd; ++ QString *json; + QObject *ret = NULL; + + dict = qmp_dispatch_check_obj(request, allow_oob, errp); +@@ -128,6 +131,19 @@ static QObject *do_qmp_dispatch(QmpCommandList *cmds, QObject *request, + qobject_ref(args); + } + ++ json = qobject_to_json(QOBJECT(args)); ++ if (json) { ++ if ((strcmp(command, "query-block-jobs") != 0) ++ && (strcmp(command, "query-migrate") != 0) ++ && (strcmp(command, "query-blockstats") != 0) ++ && (strcmp(command, "query-balloon") != 0) ++ && (strcmp(command, "set_password") != 0)) { ++ qemu_log("qmp_cmd_name: %s, arguments: %s\n", ++ command, qstring_get_str(json)); ++ } ++ qobject_unref(json); ++ } ++ + cmd->fn(args, &ret, &local_err); + if (local_err) { + error_propagate(errp, local_err); +diff --git a/qdev-monitor.c b/qdev-monitor.c +index 58222c2..c6c1d3f 100644 +--- a/qdev-monitor.c ++++ b/qdev-monitor.c +@@ -34,6 +34,7 @@ + #include "qemu/qemu-print.h" + #include "sysemu/block-backend.h" + #include "migration/misc.h" ++#include "qemu/log.h" + + /* + * Aliases were a bad idea from the start. Let's keep them +@@ -586,6 +587,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { ++ error_setg(errp, "can not find bus for %s", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +@@ -627,6 +629,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + + /* set properties */ + if (qemu_opt_foreach(opts, set_property, dev, &err)) { ++ error_setg(errp, "the bus %s -driver %s set property failed", ++ bus ? bus->name : "None", driver); + goto err_del_dev; + } + +@@ -636,6 +640,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + dev->opts = NULL; + goto err_del_dev; + } ++ qemu_log("add qdev %s:%s success\n", driver, ++ qemu_opts_id(opts) ? qemu_opts_id(opts) : "none"); + return dev; + + err_del_dev: +-- +1.8.3.1 + diff --git a/make-check-unit-use-after-free-in-test-opts-visitor.patch b/make-check-unit-use-after-free-in-test-opts-visitor.patch new file mode 100644 index 0000000000000000000000000000000000000000..590970004769b464b68977639a0e5e823bb9b9ac --- /dev/null +++ b/make-check-unit-use-after-free-in-test-opts-visitor.patch @@ -0,0 +1,102 @@ +From e3dfb5d2848975e9e947cb894afac87ce386a2bc Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 15:18:52 +0800 +Subject: [PATCH] make check-unit: use after free in test-opts-visitor + +In the struct OptsVisitor, the 'repeated_opts' member points to a list +in the 'unprocessed_opts' hash table after the list has been destroyed. +A subsequent call to visit_type_int() references the deleted list. +It results in use-after-free issue reproduced by running the test case +under the Valgrind: valgrind tests/test-opts-visitor. +A new mode ListMode::LM_TRAVERSED is declared to mark the list +traversal completed. + +Suggested-by: Markus Armbruster +Signed-off-by: Andrey Shinkevich +Message-Id: <1565024586-387112-1-git-send-email-andrey.shinkevich@virtuozzo.com> +--- + qapi/opts-visitor.c | 26 ++++++++++++++++++++++---- + 1 file changed, 22 insertions(+), 4 deletions(-) + +diff --git a/qapi/opts-visitor.c b/qapi/opts-visitor.c +index 324b1974..42d87df6 100644 +--- a/qapi/opts-visitor.c ++++ b/qapi/opts-visitor.c +@@ -24,7 +24,8 @@ enum ListMode + { + LM_NONE, /* not traversing a list of repeated options */ + +- LM_IN_PROGRESS, /* opts_next_list() ready to be called. ++ LM_IN_PROGRESS, /* ++ * opts_next_list() ready to be called. + * + * Generating the next list link will consume the most + * recently parsed QemuOpt instance of the repeated +@@ -36,7 +37,8 @@ enum ListMode + * LM_UNSIGNED_INTERVAL. + */ + +- LM_SIGNED_INTERVAL, /* opts_next_list() has been called. ++ LM_SIGNED_INTERVAL, /* ++ * opts_next_list() has been called. + * + * Generating the next list link will consume the most + * recently stored element from the signed interval, +@@ -48,7 +50,14 @@ enum ListMode + * next element of the signed interval. + */ + +- LM_UNSIGNED_INTERVAL /* Same as above, only for an unsigned interval. */ ++ LM_UNSIGNED_INTERVAL, /* Same as above, only for an unsigned interval. */ ++ ++ LM_TRAVERSED /* ++ * opts_next_list() has been called. ++ * ++ * No more QemuOpt instance in the list. ++ * The traversal has been completed. ++ */ + }; + + typedef enum ListMode ListMode; +@@ -238,6 +247,8 @@ opts_next_list(Visitor *v, GenericList *tail, size_t size) + OptsVisitor *ov = to_ov(v); + + switch (ov->list_mode) { ++ case LM_TRAVERSED: ++ return NULL; + case LM_SIGNED_INTERVAL: + case LM_UNSIGNED_INTERVAL: + if (ov->list_mode == LM_SIGNED_INTERVAL) { +@@ -258,6 +269,8 @@ opts_next_list(Visitor *v, GenericList *tail, size_t size) + opt = g_queue_pop_head(ov->repeated_opts); + if (g_queue_is_empty(ov->repeated_opts)) { + g_hash_table_remove(ov->unprocessed_opts, opt->name); ++ ov->repeated_opts = NULL; ++ ov->list_mode = LM_TRAVERSED; + return NULL; + } + break; +@@ -289,7 +302,8 @@ opts_end_list(Visitor *v, void **obj) + + assert(ov->list_mode == LM_IN_PROGRESS || + ov->list_mode == LM_SIGNED_INTERVAL || +- ov->list_mode == LM_UNSIGNED_INTERVAL); ++ ov->list_mode == LM_UNSIGNED_INTERVAL || ++ ov->list_mode == LM_TRAVERSED); + ov->repeated_opts = NULL; + ov->list_mode = LM_NONE; + } +@@ -306,6 +320,10 @@ lookup_scalar(const OptsVisitor *ov, const char *name, Error **errp) + list = lookup_distinct(ov, name, errp); + return list ? g_queue_peek_tail(list) : NULL; + } ++ if (ov->list_mode == LM_TRAVERSED) { ++ error_setg(errp, "Fewer list elements than expected"); ++ return NULL; ++ } + assert(ov->list_mode == LM_IN_PROGRESS); + return g_queue_peek_head(ov->repeated_opts); + } +-- +2.19.1 + diff --git a/make-release-pull-in-edk2-submodules-so-we-can-build.patch b/make-release-pull-in-edk2-submodules-so-we-can-build.patch new file mode 100644 index 0000000000000000000000000000000000000000..70bcc864d0fd976919e540165bc7167e5026c46e --- /dev/null +++ b/make-release-pull-in-edk2-submodules-so-we-can-build.patch @@ -0,0 +1,60 @@ +From c5c9b1362d1652a9d0f79f6d9ae2f80d4b5fe432 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 12 Sep 2019 18:12:01 -0500 +Subject: [PATCH] make-release: pull in edk2 submodules so we can build it from + tarballs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The `make efi` target added by 536d2173 is built from the roms/edk2 +submodule, which in turn relies on additional submodules nested under +roms/edk2. + +The make-release script currently only pulls in top-level submodules, +so these nested submodules are missing in the resulting tarball. + +We could try to address this situation more generally by recursively +pulling in all submodules, but this doesn't necessarily ensure the +end-result will build properly (this case also required other changes). + +Additionally, due to the nature of submodules, we may not always have +control over how these sorts of things are dealt with, so for now we +continue to handle it on a case-by-case in the make-release script. + +Cc: Laszlo Ersek +Cc: Bruce Rogers +Cc: qemu-stable@nongnu.org # v4.1.0 +Reported-by: Bruce Rogers +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth +Message-Id: <20190912231202.12327-2-mdroth@linux.vnet.ibm.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 45c61c6c23918e3b05ed9ecac5b2328ebae5f774) +Signed-off-by: Michael Roth +--- + scripts/make-release | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/scripts/make-release b/scripts/make-release +index b4af9c9e52..a2a8cda33c 100755 +--- a/scripts/make-release ++++ b/scripts/make-release +@@ -20,6 +20,14 @@ git checkout "v${version}" + git submodule update --init + (cd roms/seabios && git describe --tags --long --dirty > .version) + (cd roms/skiboot && ./make_version.sh > .version) ++# Fetch edk2 submodule's submodules, since it won't have access to them via ++# the tarball later. ++# ++# A more uniform way to handle this sort of situation would be nice, but we ++# don't necessarily have much control over how a submodule handles its ++# submodule dependencies, so we continue to handle these on a case-by-case ++# basis for now. ++(cd roms/edk2 && git submodule update --init) + popd + tar --exclude=.git -cjf ${destination}.tar.bz2 ${destination} + rm -rf ${destination} +-- +2.23.0 diff --git a/mcf5208-fix-leak-from-qemu_allocate_irqs.patch b/mcf5208-fix-leak-from-qemu_allocate_irqs.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e254f577e4f08bc332bb94dda769ce9a584c623 --- /dev/null +++ b/mcf5208-fix-leak-from-qemu_allocate_irqs.patch @@ -0,0 +1,29 @@ +From 07b7cdb648124748c34be299fbfdfe3b6e38a521 Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 14:53:00 +0800 +Subject: [PATCH] mcf5208: fix leak from qemu_allocate_irqs + +The array returned by qemu_allocate_irqs is malloced, free it. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Thomas Huth +--- + hw/m68k/mcf5208.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c +index 6f6efae9..cc765eac 100644 +--- a/hw/m68k/mcf5208.c ++++ b/hw/m68k/mcf5208.c +@@ -270,6 +270,8 @@ static void mcf5208evb_init(MachineState *machine) + 0xfc030000, pic + 36); + } + ++ g_free(pic); ++ + /* 0xfc000000 SCM. */ + /* 0xfc004000 XBS. */ + /* 0xfc008000 FlexBus CS. */ +-- +2.19.1 + diff --git a/megasas-avoid-NULL-pointer-dereference.patch b/megasas-avoid-NULL-pointer-dereference.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7bc95901d82110b49e65ccab6cd9a84dc562aa0 --- /dev/null +++ b/megasas-avoid-NULL-pointer-dereference.patch @@ -0,0 +1,36 @@ +From cf7f42b21aaa7694c6232a9a5027de9df341f299 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 14 May 2020 00:55:39 +0530 +Subject: [PATCH 5/9] megasas: avoid NULL pointer dereference + +While in megasas_handle_frame(), megasas_enqueue_frame() may +set a NULL frame into MegasasCmd object for a given 'frame_addr' +address. Add check to avoid a NULL pointer dereference issue. + +Reported-by: Alexander Bulekov +Fixes: https://bugs.launchpad.net/qemu/+bug/1878259 +Signed-off-by: Prasad J Pandit +Acked-by: Alexander Bulekov +Reviewed-by: Darren Kenny +Message-Id: <20200513192540.1583887-3-ppandit@redhat.com> +Signed-off-by: Paolo Bonzini +--- + hw/scsi/megasas.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c +index 7ee331d9da..5923ffbd22 100644 +--- a/hw/scsi/megasas.c ++++ b/hw/scsi/megasas.c +@@ -503,7 +503,7 @@ static MegasasCmd *megasas_enqueue_frame(MegasasState *s, + cmd->pa = frame; + /* Map all possible frames */ + cmd->frame = pci_dma_map(pcid, frame, &frame_size_p, 0); +- if (frame_size_p != frame_size) { ++ if (!cmd->frame || frame_size_p != frame_size) { + trace_megasas_qf_map_failed(cmd->index, (unsigned long)frame); + if (cmd->frame) { + megasas_unmap_frame(s, cmd); +-- +2.25.1 + diff --git a/megasas-use-unsigned-type-for-positive-numeric-field.patch b/megasas-use-unsigned-type-for-positive-numeric-field.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e194395193623e061917b0a5e6315d6b8564a61 --- /dev/null +++ b/megasas-use-unsigned-type-for-positive-numeric-field.patch @@ -0,0 +1,97 @@ +From 7bad515189482d289d3efe4133c8af9f184662e4 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 14 May 2020 00:55:40 +0530 +Subject: [PATCH 6/9] megasas: use unsigned type for positive numeric fields + +Use unsigned type for the MegasasState fields which hold positive +numeric values. + +Signed-off-by: Prasad J Pandit +Reviewed-by: Darren Kenny +Message-Id: <20200513192540.1583887-4-ppandit@redhat.com> +Signed-off-by: Paolo Bonzini +--- + hw/scsi/megasas.c | 38 +++++++++++++++++++------------------- + 1 file changed, 19 insertions(+), 19 deletions(-) + +diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c +index 5923ffbd22..94469e8169 100644 +--- a/hw/scsi/megasas.c ++++ b/hw/scsi/megasas.c +@@ -85,34 +85,34 @@ typedef struct MegasasState { + MemoryRegion queue_io; + uint32_t frame_hi; + +- int fw_state; ++ uint32_t fw_state; + uint32_t fw_sge; + uint32_t fw_cmds; + uint32_t flags; +- int fw_luns; +- int intr_mask; +- int doorbell; +- int busy; +- int diag; +- int adp_reset; ++ uint32_t fw_luns; ++ uint32_t intr_mask; ++ uint32_t doorbell; ++ uint32_t busy; ++ uint32_t diag; ++ uint32_t adp_reset; + OnOffAuto msi; + OnOffAuto msix; + + MegasasCmd *event_cmd; +- int event_locale; ++ uint16_t event_locale; + int event_class; +- int event_count; +- int shutdown_event; +- int boot_event; ++ uint32_t event_count; ++ uint32_t shutdown_event; ++ uint32_t boot_event; + + uint64_t sas_addr; + char *hba_serial; + + uint64_t reply_queue_pa; + void *reply_queue; +- int reply_queue_len; ++ uint16_t reply_queue_len; + uint16_t reply_queue_head; +- int reply_queue_tail; ++ uint16_t reply_queue_tail; + uint64_t consumer_pa; + uint64_t producer_pa; + +@@ -2258,9 +2258,9 @@ static const VMStateDescription vmstate_megasas_gen1 = { + VMSTATE_PCI_DEVICE(parent_obj, MegasasState), + VMSTATE_MSIX(parent_obj, MegasasState), + +- VMSTATE_INT32(fw_state, MegasasState), +- VMSTATE_INT32(intr_mask, MegasasState), +- VMSTATE_INT32(doorbell, MegasasState), ++ VMSTATE_UINT32(fw_state, MegasasState), ++ VMSTATE_UINT32(intr_mask, MegasasState), ++ VMSTATE_UINT32(doorbell, MegasasState), + VMSTATE_UINT64(reply_queue_pa, MegasasState), + VMSTATE_UINT64(consumer_pa, MegasasState), + VMSTATE_UINT64(producer_pa, MegasasState), +@@ -2277,9 +2277,9 @@ static const VMStateDescription vmstate_megasas_gen2 = { + VMSTATE_PCI_DEVICE(parent_obj, MegasasState), + VMSTATE_MSIX(parent_obj, MegasasState), + +- VMSTATE_INT32(fw_state, MegasasState), +- VMSTATE_INT32(intr_mask, MegasasState), +- VMSTATE_INT32(doorbell, MegasasState), ++ VMSTATE_UINT32(fw_state, MegasasState), ++ VMSTATE_UINT32(intr_mask, MegasasState), ++ VMSTATE_UINT32(doorbell, MegasasState), + VMSTATE_UINT64(reply_queue_pa, MegasasState), + VMSTATE_UINT64(consumer_pa, MegasasState), + VMSTATE_UINT64(producer_pa, MegasasState), +-- +2.25.1 + diff --git a/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch b/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..507aeafb6911562d542f06f91c75a3dd90f43478 --- /dev/null +++ b/megasas-use-unsigned-type-for-reply_queue_head-and-c.patch @@ -0,0 +1,51 @@ +From e081fb1058e357d4d7adc30201013a46123fe2ae Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 14 May 2020 00:55:38 +0530 +Subject: [PATCH 4/9] megasas: use unsigned type for reply_queue_head and check + index + +A guest user may set 'reply_queue_head' field of MegasasState to +a negative value. Later in 'megasas_lookup_frame' it is used to +index into s->frames[] array. Use unsigned type to avoid OOB +access issue. + +Also check that 'index' value stays within s->frames[] bounds +through the while() loop in 'megasas_lookup_frame' to avoid OOB +access. + +Reported-by: Ren Ding +Reported-by: Hanqing Zhao +Reported-by: Alexander Bulekov +Signed-off-by: Prasad J Pandit +Acked-by: Alexander Bulekov +Message-Id: <20200513192540.1583887-2-ppandit@redhat.com> +Signed-off-by: Paolo Bonzini +--- + hw/scsi/megasas.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c +index 0c4399930a..7ee331d9da 100644 +--- a/hw/scsi/megasas.c ++++ b/hw/scsi/megasas.c +@@ -111,7 +111,7 @@ typedef struct MegasasState { + uint64_t reply_queue_pa; + void *reply_queue; + int reply_queue_len; +- int reply_queue_head; ++ uint16_t reply_queue_head; + int reply_queue_tail; + uint64_t consumer_pa; + uint64_t producer_pa; +@@ -444,7 +444,7 @@ static MegasasCmd *megasas_lookup_frame(MegasasState *s, + + index = s->reply_queue_head; + +- while (num < s->fw_cmds) { ++ while (num < s->fw_cmds && index < MEGASAS_MAX_FRAMES) { + if (s->frames[index].pa && s->frames[index].pa == frame) { + cmd = &s->frames[index]; + break; +-- +2.25.1 + diff --git a/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch b/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch new file mode 100644 index 0000000000000000000000000000000000000000..c67de46045dcbdca04a8a78d8ca0d44b27a794c2 --- /dev/null +++ b/memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch @@ -0,0 +1,32 @@ +From b7f4f3b71a179a21a90ca32ef7d6ea000fb0e3bd Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 25 Mar 2019 16:35:05 +0100 +Subject: [PATCH] memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region + attribute + +We introduce a new IOMMU Memory Region attribute, IOMMU_ATTR_MSI_TRANSLATE +which tells whether the virtual IOMMU translates MSIs. ARM SMMU +will expose this attribute since, as opposed to Intel DMAR, MSIs +are translated as any other DMA requests. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + include/exec/memory.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 74606e14aa..716b07e115 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -242,6 +242,7 @@ struct MemoryRegionOps { + enum IOMMUMemoryRegionAttr { + IOMMU_ATTR_SPAPR_TCE_FD, + IOMMU_ATTR_VFIO_NESTED, ++ IOMMU_ATTR_MSI_TRANSLATE, + }; + + /** +-- +2.27.0 + diff --git a/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch b/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch new file mode 100644 index 0000000000000000000000000000000000000000..3932161dc8aeb2377a64f77c1ccc2e8a5c0d9a6a --- /dev/null +++ b/memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch @@ -0,0 +1,72 @@ +From 5f4291f431add76b8754a5fb2d62ab4108ece73f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 1 Jul 2019 11:30:30 +0200 +Subject: [PATCH] memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region + attribute + +We introduce a new IOMMU Memory Region attribute, +IOMMU_ATTR_VFIO_NESTED that tells whether the virtual IOMMU +requires HW nested paging for VFIO integration. + +Current Intel virtual IOMMU device supports "Caching +Mode" and does not require 2 stages at physical level to be +integrated with VFIO. However SMMUv3 does not implement such +"caching mode" and requires to use HW nested paging. + +As such SMMUv3 is the first IOMMU device to advertise this +attribute. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/arm/smmuv3.c | 12 ++++++++++++ + include/exec/memory.h | 3 ++- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 0ef1ca376c..55eed5189e 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1531,6 +1531,17 @@ static void smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu, + } + } + ++static int smmuv3_get_attr(IOMMUMemoryRegion *iommu, ++ enum IOMMUMemoryRegionAttr attr, ++ void *data) ++{ ++ if (attr == IOMMU_ATTR_VFIO_NESTED) { ++ *(bool *) data = true; ++ return 0; ++ } ++ return -EINVAL; ++} ++ + static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + void *data) + { +@@ -1538,6 +1549,7 @@ static void smmuv3_iommu_memory_region_class_init(ObjectClass *klass, + + imrc->translate = smmuv3_translate; + imrc->notify_flag_changed = smmuv3_notify_flag_changed; ++ imrc->get_attr = smmuv3_get_attr; + } + + static const TypeInfo smmuv3_type_info = { +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 3c5206dce6..74606e14aa 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -240,7 +240,8 @@ struct MemoryRegionOps { + }; + + enum IOMMUMemoryRegionAttr { +- IOMMU_ATTR_SPAPR_TCE_FD ++ IOMMU_ATTR_SPAPR_TCE_FD, ++ IOMMU_ATTR_VFIO_NESTED, + }; + + /** +-- +2.27.0 + diff --git a/memory-Add-new-fields-in-IOTLBEntry.patch b/memory-Add-new-fields-in-IOTLBEntry.patch new file mode 100644 index 0000000000000000000000000000000000000000..d76ff3bcd7321b32c9a57b6862f68b19f1216daa --- /dev/null +++ b/memory-Add-new-fields-in-IOTLBEntry.patch @@ -0,0 +1,84 @@ +From 5a77056573d946eb9220b90dd1edce1f6f925c42 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 4 Sep 2018 08:43:05 -0400 +Subject: [PATCH] memory: Add new fields in IOTLBEntry + +The current IOTLBEntry becomes too simple to interact with +some physical IOMMUs. IOTLBs can be invalidated with different +granularities: domain, pasid, addr. Current IOTLB entry only offers +page selective invalidation. Let's add a granularity field +that conveys this information. + +TLB entries are usually tagged with some ids such as the asid +or pasid. When propagating an invalidation command from the +guest to the host, we need to pass those IDs. + +Also we add a leaf field which indicates, in case of invalidation +notification, whether only cache entries for the last level of +translation are required to be invalidated. + +A flag field is introduced to inform whether those fields are set. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + include/exec/memory.h | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index dca8184277..3c5206dce6 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -66,14 +66,48 @@ typedef enum { + IOMMU_RW = 3, + } IOMMUAccessFlags; + ++/* Granularity of the cache invalidation */ ++typedef enum { ++ IOMMU_INV_GRAN_ADDR = 0, ++ IOMMU_INV_GRAN_PASID, ++ IOMMU_INV_GRAN_DOMAIN, ++} IOMMUInvGranularity; ++ + #define IOMMU_ACCESS_FLAG(r, w) (((r) ? IOMMU_RO : 0) | ((w) ? IOMMU_WO : 0)) + ++/** ++ * IOMMUTLBEntry - IOMMU TLB entry ++ * ++ * Structure used when performing a translation or when notifying MAP or ++ * UNMAP (invalidation) events ++ * ++ * @target_as: target address space ++ * @iova: IO virtual address (input) ++ * @translated_addr: translated address (output) ++ * @addr_mask: address mask (0xfff means 4K binding), must be multiple of 2 ++ * @perm: permission flag of the mapping (NONE encodes no mapping or ++ * invalidation notification) ++ * @granularity: granularity of the invalidation ++ * @flags: informs whether the following fields are set ++ * @arch_id: architecture specific ID tagging the TLB ++ * @pasid: PASID tagging the TLB ++ * @leaf: when @perm is NONE, indicates whether only caches for the last ++ * level of translation need to be invalidated. ++ */ + struct IOMMUTLBEntry { + AddressSpace *target_as; + hwaddr iova; + hwaddr translated_addr; +- hwaddr addr_mask; /* 0xfff = 4k translation */ ++ hwaddr addr_mask; + IOMMUAccessFlags perm; ++ IOMMUInvGranularity granularity; ++#define IOMMU_INV_FLAGS_PASID (1 << 0) ++#define IOMMU_INV_FLAGS_ARCHID (1 << 1) ++#define IOMMU_INV_FLAGS_LEAF (1 << 2) ++ uint32_t flags; ++ uint32_t arch_id; ++ uint32_t pasid; ++ bool leaf; + }; + + /* +-- +2.27.0 + diff --git a/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch b/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch new file mode 100644 index 0000000000000000000000000000000000000000..7cecd31a9765fb0926a4de993b38e0d5e68dfd6b --- /dev/null +++ b/memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch @@ -0,0 +1,89 @@ +From 497e055ed89e3cb5286dde2b05b7d7fd67e69331 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 13 Sep 2018 14:13:04 +0200 +Subject: [PATCH] memory: Introduce IOMMU Memory Region inject_faults API + +This new API allows to inject @count iommu_faults into +the IOMMU memory region. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + include/exec/memory.h | 25 +++++++++++++++++++++++++ + memory.c | 10 ++++++++++ + 2 files changed, 35 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 716b07e115..ffd4282f14 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -56,6 +56,8 @@ struct MemoryRegionMmio { + CPUWriteMemoryFunc *write[3]; + }; + ++struct iommu_fault; ++ + typedef struct IOMMUTLBEntry IOMMUTLBEntry; + + /* See address_space_translate: bit 0 is read, bit 1 is write. */ +@@ -378,6 +380,19 @@ typedef struct IOMMUMemoryRegionClass { + * @iommu: the IOMMUMemoryRegion + */ + int (*num_indexes)(IOMMUMemoryRegion *iommu); ++ ++ /* ++ * Inject @count faults into the IOMMU memory region ++ * ++ * Optional method: if this method is not provided, then ++ * memory_region_injection_faults() will return -ENOENT ++ * ++ * @iommu: the IOMMU memory region to inject the faults in ++ * @count: number of faults to inject ++ * @buf: fault buffer ++ */ ++ int (*inject_faults)(IOMMUMemoryRegion *iommu, int count, ++ struct iommu_fault *buf); + } IOMMUMemoryRegionClass; + + typedef struct CoalescedMemoryRange CoalescedMemoryRange; +@@ -1182,6 +1197,16 @@ int memory_region_iommu_attrs_to_index(IOMMUMemoryRegion *iommu_mr, + */ + int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr); + ++/** ++ * memory_region_inject_faults : inject @count faults stored in @buf ++ * ++ * @iommu_mr: the IOMMU memory region ++ * @count: number of faults to be injected ++ * @buf: buffer containing the faults ++ */ ++int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, ++ struct iommu_fault *buf); ++ + /** + * memory_region_name: get a memory region's name + * +diff --git a/memory.c b/memory.c +index 708b3dff3d..623f89baa4 100644 +--- a/memory.c ++++ b/memory.c +@@ -2017,6 +2017,16 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr) + return imrc->num_indexes(iommu_mr); + } + ++int memory_region_inject_faults(IOMMUMemoryRegion *iommu_mr, int count, ++ struct iommu_fault *buf) ++{ ++ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); ++ if (!imrc->inject_faults) { ++ return -ENOENT; ++ } ++ return imrc->inject_faults(iommu_mr, count, buf); ++} ++ + void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) + { + uint8_t mask = 1 << client; +-- +2.27.0 + diff --git a/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch b/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c4052639c2c104de23c1a6d99674ec036e675af --- /dev/null +++ b/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch @@ -0,0 +1,35 @@ +From 0ae8b3e05294fee99870efa9b58e22e16f31caf9 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:20 +0530 +Subject: [PATCH] memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled + +mr->ram_block is NULL when mr->is_iommu is true, then fr.dirty_log_mask +wasn't set correctly due to which memory listener's log_sync doesn't +get called. +This patch returns log_mask with DIRTY_MEMORY_MIGRATION set when +IOMMU is enabled. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Yan Zhao +Acked-by: Paolo Bonzini +Signed-off-by: Alex Williamson +--- + memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 5d8c9a9234..44713efc66 100644 +--- a/memory.c ++++ b/memory.c +@@ -1825,7 +1825,7 @@ bool memory_region_is_ram_device(MemoryRegion *mr) + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +- if (global_dirty_log && mr->ram_block) { ++ if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; +-- +2.27.0 + diff --git a/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a25d177e1349ef0faca9b9280e70bb43dfd2837 --- /dev/null +++ b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch @@ -0,0 +1,42 @@ +From d0d816682b790b7d8a9caf17c32eadde7756ac9c Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Mon, 16 Nov 2020 21:22:10 +0800 +Subject: [PATCH] memory: Skip dirty tracking for un-migratable memory regions + +It makes no sense to track dirty pages for those un-migratable memory +regions (e.g., Memory BAR region of the VFIO PCI device) and doing so +will potentially lead to some unpleasant issues during migration [1]. + +Skip dirty tracking for those regions by evaluating if the region is +migratable before setting dirty_log_mask (DIRTY_MEMORY_MIGRATION). + +[1] https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg03757.html + +Signed-off-by: Zenghui Yu +Message-Id: <20201116132210.1730-1-yuzenghui@huawei.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 44713efc66..708b3dff3d 100644 +--- a/memory.c ++++ b/memory.c +@@ -1825,7 +1825,10 @@ bool memory_region_is_ram_device(MemoryRegion *mr) + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +- if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { ++ RAMBlock *rb = mr->ram_block; ++ ++ if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) || ++ memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; +-- +2.27.0 + diff --git a/memory-clamp-cached-translation-in-case-it-points-to.patch b/memory-clamp-cached-translation-in-case-it-points-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4f74d4e094c937f8b415683dc06997c3ce6cc90 --- /dev/null +++ b/memory-clamp-cached-translation-in-case-it-points-to.patch @@ -0,0 +1,72 @@ +From e07e9fc9d97e9cae3d6316b7286b504398a6fc80 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 13 Jan 2021 14:50:59 +0800 +Subject: [PATCH] memory: clamp cached translation in case it points to an MMIO + region + +In using the address_space_translate_internal API, address_space_cache_init +forgot one piece of advice that can be found in the code for +address_space_translate_internal: + + /* MMIO registers can be expected to perform full-width accesses based only + * on their address, without considering adjacent registers that could + * decode to completely different MemoryRegions. When such registers + * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO + * regions overlap wildly. For this reason we cannot clamp the accesses + * here. + * + * If the length is small (as is the case for address_space_ldl/stl), + * everything works fine. If the incoming length is large, however, + * the caller really has to do the clamping through memory_access_size. + */ + +address_space_cache_init is exactly one such case where "the incoming length +is large", therefore we need to clamp the resulting length---not to +memory_access_size though, since we are not doing an access yet, but to +the size of the resulting section. This ensures that subsequent accesses +to the cached MemoryRegionSection will be in range. + +With this patch, the enclosed testcase notices that the used ring does +not fit into the MSI-X table and prints a "qemu-system-x86_64: Cannot map used" +error. + +Signed-off-by: Paolo Bonzini +(cherry-picked from 4bfb024b) +Fix CVE-2020-27821 +Signed-off-by: Alex Chen +--- + exec.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/exec.c b/exec.c +index 85c6d80353..8822c241d8 100644 +--- a/exec.c ++++ b/exec.c +@@ -3834,6 +3834,7 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + AddressSpaceDispatch *d; + hwaddr l; + MemoryRegion *mr; ++ Int128 diff; + + assert(len > 0); + +@@ -3842,6 +3843,16 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, + d = flatview_to_dispatch(cache->fv); + cache->mrs = *address_space_translate_internal(d, addr, &cache->xlat, &l, true); + ++ /* ++ * cache->xlat is now relative to cache->mrs.mr, not to the section itself. ++ * Take that into account to compute how many bytes are there between ++ * cache->xlat and the end of the section. ++ */ ++ ++ diff = int128_sub(cache->mrs.size, ++ int128_make64(cache->xlat - cache->mrs.offset_within_region)); ++ l = int128_get64(int128_min(diff, int128_make64(l))); ++ + mr = cache->mrs.mr; + memory_region_ref(mr); + if (memory_access_is_direct(mr, is_write)) { +-- +2.27.0 + diff --git a/memory-unref-the-memory-region-in-simplify-flatview.patch b/memory-unref-the-memory-region-in-simplify-flatview.patch deleted file mode 100644 index eb199646181ccf95dcfef2208a6b5592ff20b6d8..0000000000000000000000000000000000000000 --- a/memory-unref-the-memory-region-in-simplify-flatview.patch +++ /dev/null @@ -1,85 +0,0 @@ -From b9f43f0cca03586a31b53e47ade72e77db01cb4c Mon Sep 17 00:00:00 2001 -From: King Wang -Date: Fri, 12 Jul 2019 14:52:41 +0800 -Subject: [PATCH 2/5] memory: unref the memory region in simplify flatview - -The memory region reference is increased when insert a range -into flatview range array, then decreased by destroy flatview. -If some flat range merged by flatview_simplify, the memory region -reference can not be decreased by destroy flatview any more. - -In this case, start virtual machine by the command line: -qemu-system-x86_64 --name guest=ubuntu,debug-threads=on --machine pc,accel=kvm,usb=off,dump-guest-core=off --cpu host --m 16384 --realtime mlock=off --smp 8,sockets=2,cores=4,threads=1 --object memory-backend-file,id=ram-node0,prealloc=yes,mem-path=/dev/hugepages,share=yes,size=8589934592 --numa node,nodeid=0,cpus=0-3,memdev=ram-node0 --object memory-backend-file,id=ram-node1,prealloc=yes,mem-path=/dev/hugepages,share=yes,size=8589934592 --numa node,nodeid=1,cpus=4-7,memdev=ram-node1 --no-user-config --nodefaults --rtc base=utc --no-shutdown --boot strict=on --device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 --device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x2 --device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x3 --drive file=ubuntu.qcow2,format=qcow2,if=none,id=drive-virtio-disk0,cache=none,aio=native --device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 --chardev pty,id=charserial0 --device isa-serial,chardev=charserial0,id=serial0 --device usb-tablet,id=input0,bus=usb.0,port=1 --vnc 0.0.0.0:0 --device VGA,id=video0,vgamem_mb=16,bus=pci.0,addr=0x5 --device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 --msg timestamp=on - -And run the script in guest OS: -while true -do - setpci -s 00:06.0 04.b=03 - setpci -s 00:06.0 04.b=07 -done - -I found the reference of node0 HostMemoryBackendFile is a big one. -(gdb) p numa_info[0]->node_memdev->parent.ref -$6 = 1636278 -(gdb) - -Signed-off-by: King Wang -Message-Id: <20190712065241.11784-1-king.wang@huawei.com> -Signed-off-by: Paolo Bonzini ---- - memory.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/memory.c b/memory.c -index 9fbca52..0b49281 100644 ---- a/memory.c -+++ b/memory.c -@@ -320,7 +320,7 @@ static bool can_merge(FlatRange *r1, FlatRange *r2) - /* Attempt to simplify a view by merging adjacent ranges */ - static void flatview_simplify(FlatView *view) - { -- unsigned i, j; -+ unsigned i, j, k; - - i = 0; - while (i < view->nr) { -@@ -331,6 +331,9 @@ static void flatview_simplify(FlatView *view) - ++j; - } - ++i; -+ for (k = i; k < j; k++) { -+ memory_region_unref(view->ranges[k].mr); -+ } - memmove(&view->ranges[i], &view->ranges[j], - (view->nr - j) * sizeof(view->ranges[j])); - view->nr -= j - i; --- -1.8.3.1 - diff --git a/microblaze-fix-leak-of-fdevice-tree-blob.patch b/microblaze-fix-leak-of-fdevice-tree-blob.patch new file mode 100644 index 0000000000000000000000000000000000000000..dd845e80cef5f3315e44417f3b7eeaa60ce6b8bb --- /dev/null +++ b/microblaze-fix-leak-of-fdevice-tree-blob.patch @@ -0,0 +1,32 @@ +From 2ff9c28e2b72cd359a0c4e931412e355baee8e1e Mon Sep 17 00:00:00 2001 +From: lizhengui +Date: Wed, 9 Sep 2020 14:55:11 +0800 +Subject: [PATCH] microblaze: fix leak of fdevice tree blob +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The device tree blob returned by load_device_tree is malloced. +Free it before returning. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Philippe Mathieu-Daudé +--- + hw/microblaze/boot.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c +index a7af4c07..0fcc4e9d 100644 +--- a/hw/microblaze/boot.c ++++ b/hw/microblaze/boot.c +@@ -99,6 +99,7 @@ static int microblaze_load_dtb(hwaddr addr, + } + + cpu_physical_memory_write(addr, fdt, fdt_size); ++ g_free(fdt); + return fdt_size; + } + +-- +2.19.1 + diff --git a/migration-Add-compress_level-sanity-check.patch b/migration-Add-compress_level-sanity-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..8513384f438612de27e6c4508d3f38d22edcd897 --- /dev/null +++ b/migration-Add-compress_level-sanity-check.patch @@ -0,0 +1,67 @@ +From 90c8ce0b3bcf4a3140bc4b500da9b55a694e1bde Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 16:23:15 +0800 +Subject: [PATCH] migration: Add compress_level sanity check + +Zlib compression has level from 1 to 9. However Zstd compression has level +from 1 to 22 (level >= 20 not recommanded). Let's do sanity check here +to make sure a vaild compress_level is given by user. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++---- + 1 file changed, 28 insertions(+), 4 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 67425fde7a..17a5c16c79 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1111,16 +1111,40 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + } + } + ++static bool compress_level_check(MigrationParameters *params, Error **errp) ++{ ++ switch (params->compress_method) { ++ case COMPRESS_METHOD_ZLIB: ++ if (params->compress_level > 9 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 0 to 9 for Zlib method"); ++ return false; ++ } ++ break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ if (params->compress_level > 19 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 1 to 19 for Zstd method"); ++ return false; ++ } ++ break; ++#endif ++ default: ++ error_setg(errp, "Checking compress_level failed for unknown reason"); ++ return false; ++ } ++ ++ return true; ++} ++ + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. + */ + static bool migrate_params_check(MigrationParameters *params, Error **errp) + { +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "is invalid, it should be in the range of 0 to 9"); ++ if (params->has_compress_level && !compress_level_check(params, errp)) { + return false; + } + +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-method.patch b/migration-Add-multi-thread-compress-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..e900a729ea9779862318e0d2c192f88d8cf81c44 --- /dev/null +++ b/migration-Add-multi-thread-compress-method.patch @@ -0,0 +1,365 @@ +From b0cabc67e16d9b4e1e749b0359dd8f3874e0968d Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 14:57:54 +0800 +Subject: [PATCH] migration: Add multi-thread compress method + +A multi-thread compress method parameter is added to hold the method we +are going to use. By default the 'zlib' method is used to maintain the +compatibility as before. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-prop-internal.h | 18 ++++++++++++++++++ + hw/core/qdev-properties-system.c | 13 +++++++++++++ + hw/core/qdev-properties.c | 14 +++++++++++--- + include/hw/qdev-properties.h | 4 ++++ + migration/migration.c | 15 +++++++++++++++ + migration/qemu-file.c | 9 +++++++++ + monitor/hmp-cmds.c | 13 +++++++++++++ + qapi/migration.json | 26 +++++++++++++++++++++++++- + 8 files changed, 108 insertions(+), 4 deletions(-) + create mode 100644 hw/core/qdev-prop-internal.h + +diff --git a/hw/core/qdev-prop-internal.h b/hw/core/qdev-prop-internal.h +new file mode 100644 +index 0000000000..a4a7eaf078 +--- /dev/null ++++ b/hw/core/qdev-prop-internal.h +@@ -0,0 +1,18 @@ ++/* ++ * qdev property parsing ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef HW_CORE_QDEV_PROP_INTERNAL_H ++#define HW_CORE_QDEV_PROP_INTERNAL_H ++ ++void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, ++ Error **errp); ++void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, ++ Error **errp); ++ ++void set_default_value_enum(Object *obj, const Property *prop); ++ ++#endif +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index ba412dd2ca..67ed89b406 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -15,6 +15,7 @@ + #include "hw/qdev.h" + #include "qapi/error.h" + #include "qapi/qmp/qerror.h" ++#include "qapi/qapi-types-migration.h" + #include "sysemu/block-backend.h" + #include "sysemu/blockdev.h" + #include "hw/block/block.h" +@@ -23,6 +24,7 @@ + #include "chardev/char-fe.h" + #include "sysemu/iothread.h" + #include "sysemu/tpm_backend.h" ++#include "qdev-prop-internal.h" + + static void get_pointer(Object *obj, Visitor *v, Property *prop, + char *(*print)(void *ptr), +@@ -399,3 +401,14 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) + } + nd->instantiated = 1; + } ++ ++/* --- CompressMethod --- */ ++const PropertyInfo qdev_prop_compress_method = { ++ .name = "CompressMethod", ++ .description = "multi-thread compression method, " ++ "zlib", ++ .enum_table = &CompressMethod_lookup, ++ .get = get_enum, ++ .set = set_enum, ++ .set_default_value = set_default_value_enum, ++}; +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 81c97f48a7..709f9e0f9d 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -11,6 +11,7 @@ + #include "qapi/visitor.h" + #include "chardev/char.h" + #include "qemu/uuid.h" ++#include "qdev-prop-internal.h" + + void qdev_prop_set_after_realize(DeviceState *dev, const char *name, + Error **errp) +@@ -46,7 +47,7 @@ void *qdev_get_prop_ptr(DeviceState *dev, Property *prop) + return ptr; + } + +-static void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, ++void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) + { + DeviceState *dev = DEVICE(obj); +@@ -56,7 +57,7 @@ static void get_enum(Object *obj, Visitor *v, const char *name, void *opaque, + visit_type_enum(v, prop->name, ptr, prop->info->enum_table, errp); + } + +-static void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, ++void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) + { + DeviceState *dev = DEVICE(obj); +@@ -71,7 +72,7 @@ static void set_enum(Object *obj, Visitor *v, const char *name, void *opaque, + visit_type_enum(v, prop->name, ptr, prop->info->enum_table, errp); + } + +-static void set_default_value_enum(Object *obj, const Property *prop) ++void set_default_value_enum(Object *obj, const Property *prop) + { + object_property_set_str(obj, + qapi_enum_lookup(prop->info->enum_table, +@@ -79,6 +80,13 @@ static void set_default_value_enum(Object *obj, const Property *prop) + prop->name, &error_abort); + } + ++const PropertyInfo qdev_prop_enum = { ++ .name = "enum", ++ .get = get_enum, ++ .set = set_enum, ++ .set_default_value = set_default_value_enum, ++}; ++ + /* Bit */ + + static uint32_t qdev_get_prop_mask(Property *prop) +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index 1eae5ab056..a22a532eb8 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -23,6 +23,7 @@ extern const PropertyInfo qdev_prop_tpm; + extern const PropertyInfo qdev_prop_ptr; + extern const PropertyInfo qdev_prop_macaddr; + extern const PropertyInfo qdev_prop_on_off_auto; ++extern const PropertyInfo qdev_prop_compress_method; + extern const PropertyInfo qdev_prop_losttickpolicy; + extern const PropertyInfo qdev_prop_blockdev_on_error; + extern const PropertyInfo qdev_prop_bios_chs_trans; +@@ -205,6 +206,9 @@ extern const PropertyInfo qdev_prop_pcie_link_width; + DEFINE_PROP(_n, _s, _f, qdev_prop_macaddr, MACAddr) + #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto) ++#define DEFINE_PROP_COMPRESS_METHOD(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_compress_method, \ ++ CompressMethod) + #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ + LostTickPolicy) +diff --git a/migration/migration.c b/migration/migration.c +index 0e396f22b4..c79bf09269 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -71,6 +71,7 @@ + #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 + /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ + #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++#define DEFAULT_MIGRATE_COMPRESS_METHOD COMPRESS_METHOD_ZLIB + /* Define default autoconverge cpu throttle migration parameters */ + #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 + #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 +@@ -748,6 +749,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->compress_wait_thread = s->parameters.compress_wait_thread; + params->has_decompress_threads = true; + params->decompress_threads = s->parameters.decompress_threads; ++ params->has_compress_method = true; ++ params->compress_method = s->parameters.compress_method; + params->has_cpu_throttle_initial = true; + params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; + params->has_cpu_throttle_increment = true; +@@ -1250,6 +1253,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + dest->decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ dest->compress_method = params->compress_method; ++ } ++ + if (params->has_cpu_throttle_initial) { + dest->cpu_throttle_initial = params->cpu_throttle_initial; + } +@@ -1331,6 +1338,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + s->parameters.decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ s->parameters.compress_method = params->compress_method; ++ } ++ + if (params->has_cpu_throttle_initial) { + s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; + } +@@ -3436,6 +3447,9 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState, ++ parameters.compress_method, ++ DEFAULT_MIGRATE_COMPRESS_METHOD), + DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, + parameters.cpu_throttle_initial, + DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), +@@ -3535,6 +3549,7 @@ static void migration_instance_init(Object *obj) + params->has_compress_level = true; + params->has_compress_threads = true; + params->has_decompress_threads = true; ++ params->has_compress_method = true; + params->has_cpu_throttle_initial = true; + params->has_cpu_throttle_increment = true; + params->has_max_bandwidth = true; +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index cd96d04e9a..be0d6c8ca8 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -382,6 +382,15 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size, + } + } + ++static void add_buf_to_iovec(QEMUFile *f, size_t len) ++{ ++ add_to_iovec(f, f->buf + f->buf_index, len, false); ++ f->buf_index += len; ++ if (f->buf_index == IO_BUF_SIZE) { ++ qemu_fflush(f); ++ } ++} ++ + void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, + bool may_free) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index fc5d6b92c4..e5a7a88ba2 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -41,6 +41,7 @@ + #include "qapi/qapi-commands-tpm.h" + #include "qapi/qapi-commands-ui.h" + #include "qapi/qapi-visit-net.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" + #include "qapi/qmp/qerror.h" + #include "qapi/string-input-visitor.h" +@@ -426,6 +427,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), + params->decompress_threads); + assert(params->has_cpu_throttle_initial); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD), ++ CompressMethod_str(params->compress_method)); + monitor_printf(mon, "%s: %u\n", + MigrationParameter_str(MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL), + params->cpu_throttle_initial); +@@ -1756,6 +1760,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + MigrateSetParameters *p = g_new0(MigrateSetParameters, 1); + uint64_t valuebw = 0; + uint64_t cache_size; ++ CompressMethod compress_method; + Error *err = NULL; + int val, ret; + +@@ -1781,6 +1786,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_decompress_threads = true; + visit_type_int(v, param, &p->decompress_threads, &err); + break; ++ case MIGRATION_PARAMETER_COMPRESS_METHOD: ++ p->has_compress_method = true; ++ visit_type_CompressMethod(v, param, &compress_method, &err); ++ if (err) { ++ break; ++ } ++ p->compress_method = compress_method; ++ break; + case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL: + p->has_cpu_throttle_initial = true; + visit_type_int(v, param, &p->cpu_throttle_initial, &err); +diff --git a/qapi/migration.json b/qapi/migration.json +index 6844ddfab3..b0e8c493ee 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -482,6 +482,19 @@ + ## + { 'command': 'query-migrate-capabilities', 'returns': ['MigrationCapabilityStatus']} + ++## ++# @CompressMethod: ++# ++# An enumeration of multi-thread compression methods. ++# ++# @zlib: use zlib compression method. ++# ++# Since: 5.0 ++# ++## ++{ 'enum': 'CompressMethod', ++ 'data': [ 'zlib' ] } ++ + ## + # @MigrationParameter: + # +@@ -518,6 +531,9 @@ + # compression, so set the decompress-threads to the number about 1/4 + # of compress-threads is adequate. + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @cpu-throttle-initial: Initial percentage of time guest cpus are throttled + # when migration auto-converge is activated. The + # default value is 20. (Since 2.7) +@@ -586,7 +602,7 @@ + 'data': ['announce-initial', 'announce-max', + 'announce-rounds', 'announce-step', + 'compress-level', 'compress-threads', 'decompress-threads', +- 'compress-wait-thread', ++ 'compress-wait-thread', 'compress-method', + 'cpu-throttle-initial', 'cpu-throttle-increment', + 'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth', + 'downtime-limit', 'x-checkpoint-delay', 'block-incremental', +@@ -620,6 +636,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Set compression method to use in multi-thread compression. ++# Defaults to none. (Since 5.0) ++# + # @cpu-throttle-initial: Initial percentage of time guest cpus are + # throttled when migration auto-converge is activated. + # The default value is 20. (Since 2.7) +@@ -695,6 +714,7 @@ + '*compress-threads': 'int', + '*compress-wait-thread': 'bool', + '*decompress-threads': 'int', ++ '*compress-method': 'CompressMethod', + '*cpu-throttle-initial': 'int', + '*cpu-throttle-increment': 'int', + '*tls-creds': 'StrOrNull', +@@ -753,6 +773,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @cpu-throttle-initial: Initial percentage of time guest cpus are + # throttled when migration auto-converge is activated. + # (Since 2.7) +@@ -828,6 +851,7 @@ + '*compress-threads': 'uint8', + '*compress-wait-thread': 'bool', + '*decompress-threads': 'uint8', ++ '*compress-method': 'CompressMethod', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', + '*tls-creds': 'str', +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-ops.patch b/migration-Add-multi-thread-compress-ops.patch new file mode 100644 index 0000000000000000000000000000000000000000..043d9f9b3379a4664e01b9153ccc7ecf3a702c6d --- /dev/null +++ b/migration-Add-multi-thread-compress-ops.patch @@ -0,0 +1,442 @@ +From 99fddf2ffeefc99ab15b3428dbd2b46476be3e7e Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 15:57:31 +0800 +Subject: [PATCH] migration: Add multi-thread compress ops + +Add the MigrationCompressOps and MigrationDecompressOps structures to make +the compression method configurable for multi-thread compression migration. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/migration.c | 9 ++ + migration/migration.h | 1 + + migration/ram.c | 269 ++++++++++++++++++++++++++++++------------ + 3 files changed, 201 insertions(+), 78 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c79bf09269..67425fde7a 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2143,6 +2143,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++CompressMethod migrate_compress_method(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_method; ++} ++ + bool migrate_dirty_bitmaps(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index f2bd4ebe33..4aa72297fc 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -319,6 +319,7 @@ int migrate_compress_level(void); + int migrate_compress_threads(void); + int migrate_compress_wait_thread(void); + int migrate_decompress_threads(void); ++CompressMethod migrate_compress_method(void); + bool migrate_use_events(void); + bool migrate_postcopy_blocktime(void); + +diff --git a/migration/ram.c b/migration/ram.c +index f78a681ca2..3ed808a4ca 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -417,6 +417,9 @@ struct CompressParam { + /* internally used fields */ + z_stream stream; + uint8_t *originbuf; ++ ++ /* for zlib compression */ ++ z_stream stream; + }; + typedef struct CompressParam CompressParam; + +@@ -428,12 +431,29 @@ struct DecompressParam { + void *des; + uint8_t *compbuf; + int len; ++ ++ /* for zlib compression */ + z_stream stream; + }; + typedef struct DecompressParam DecompressParam; + ++typedef struct { ++ int (*save_setup)(CompressParam *param); ++ void (*save_cleanup)(CompressParam *param); ++ ssize_t (*compress_data)(CompressParam *param, size_t size); ++} MigrationCompressOps; ++ ++typedef struct { ++ int (*load_setup)(DecompressParam *param); ++ void (*load_cleanup)(DecompressParam *param); ++ int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size); ++ int (*check_len)(int len); ++} MigrationDecompressOps; ++ + static CompressParam *comp_param; + static QemuThread *compress_threads; ++static MigrationCompressOps *compress_ops; ++static MigrationDecompressOps *decompress_ops; + /* comp_done_cond is used to wake up the migration thread when + * one of the compression threads has finished the compression. + * comp_done_lock is used to co-work with comp_done_cond. +@@ -451,6 +471,157 @@ static QemuCond decomp_done_cond; + + static bool do_compress_ram_page(CompressParam *param, RAMBlock *block); + ++static int zlib_save_setup(CompressParam *param) ++{ ++ if (deflateInit(¶m->stream, ++ migrate_compress_level()) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static ssize_t zlib_compress_data(CompressParam *param, size_t size) ++ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static void zlib_save_cleanup(CompressParam *param) ++{ ++ deflateEnd(¶m->stream); ++} ++ ++static int zlib_load_setup(DecompressParam *param) ++{ ++ if (inflateInit(¶m->stream) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int ++zlib_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int err; ++ ++ z_stream *stream = ¶m->stream; ++ ++ err = inflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = size; ++ stream->next_out = dest; ++ ++ err = inflate(stream, Z_NO_FLUSH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ return stream->total_out; ++} ++ ++static void zlib_load_cleanup(DecompressParam *param) ++{ ++ inflateEnd(¶m->stream); ++} ++ ++static int zlib_check_len(int len) ++{ ++ return len < 0 || len > compressBound(TARGET_PAGE_SIZE); ++} ++ ++static int set_compress_ops(void) ++{ ++ compress_ops = g_new0(MigrationCompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ compress_ops->save_setup = zlib_save_setup; ++ compress_ops->save_cleanup = zlib_save_cleanup; ++ compress_ops->compress_data = zlib_compress_data; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int set_decompress_ops(void) ++{ ++ decompress_ops = g_new0(MigrationDecompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ decompress_ops->load_setup = zlib_load_setup; ++ decompress_ops->load_cleanup = zlib_load_cleanup; ++ decompress_ops->decompress_data = zlib_decompress_data; ++ decompress_ops->check_len = zlib_check_len; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void clean_compress_ops(void) ++{ ++ compress_ops->save_setup = NULL; ++ compress_ops->save_cleanup = NULL; ++ compress_ops->compress_data = NULL; ++ ++ g_free(compress_ops); ++ compress_ops = NULL; ++} ++ ++static void clean_decompress_ops(void) ++{ ++ decompress_ops->load_setup = NULL; ++ decompress_ops->load_cleanup = NULL; ++ decompress_ops->decompress_data = NULL; ++ ++ g_free(decompress_ops); ++ decompress_ops = NULL; ++} ++ + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; +@@ -508,7 +679,7 @@ static void compress_threads_save_cleanup(void) + qemu_thread_join(compress_threads + i); + qemu_mutex_destroy(&comp_param[i].mutex); + qemu_cond_destroy(&comp_param[i].cond); +- deflateEnd(&comp_param[i].stream); ++ compress_ops->save_cleanup(&comp_param[i]); + g_free(comp_param[i].originbuf); + qemu_fclose(comp_param[i].file); + comp_param[i].file = NULL; +@@ -519,6 +690,7 @@ static void compress_threads_save_cleanup(void) + g_free(comp_param); + compress_threads = NULL; + comp_param = NULL; ++ clean_compress_ops(); + } + + static int compress_threads_save_setup(void) +@@ -528,6 +700,12 @@ static int compress_threads_save_setup(void) + if (!migrate_use_compression()) { + return 0; + } ++ ++ if (set_compress_ops() < 0) { ++ clean_compress_ops(); ++ return -1; ++ } ++ + thread_count = migrate_compress_threads(); + compress_threads = g_new0(QemuThread, thread_count); + comp_param = g_new0(CompressParam, thread_count); +@@ -539,8 +717,7 @@ static int compress_threads_save_setup(void) + goto exit; + } + +- if (deflateInit(&comp_param[i].stream, +- migrate_compress_level()) != Z_OK) { ++ if (compress_ops->save_setup(&comp_param[i]) < 0) { + g_free(comp_param[i].originbuf); + goto exit; + } +@@ -2208,50 +2385,6 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, + return 1; + } + +-/* +- * Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. +- */ +-static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) +-{ +- int err; +- uint8_t *dest = NULL; +- z_stream *stream = ¶m->stream; +- uint8_t *p = param->originbuf; +- QEMUFile *f = f = param->file; +- ssize_t blen = qemu_put_compress_start(f, &dest); +- +- if (blen < compressBound(size)) { +- return -1; +- } +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = size; +- stream->next_in = p; +- stream->avail_out = blen; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- blen = stream->next_out - dest; +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_compress_end(f, blen); +- return blen + sizeof(int32_t); +-} +- + static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) + { + RAMState *rs = ram_state; +@@ -2274,7 +2407,7 @@ static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) + * decompression + */ + memcpy(param->originbuf, p, TARGET_PAGE_SIZE); +- ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE); ++ ret = compress_ops->compress_data(param, TARGET_PAGE_SIZE); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +@@ -3965,32 +4098,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) + } + } + +-/* return the size after decompression, or negative value on error */ +-static int +-qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) +-{ +- int err; +- +- z_stream *stream = ¶m->stream; +- +- err = inflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = param->len; +- stream->next_in = param->compbuf; +- stream->avail_out = pagesize; +- stream->next_out = dest; +- +- err = inflate(stream, Z_NO_FLUSH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->total_out; +-} +- + static void *do_data_decompress(void *opaque) + { + DecompressParam *param = opaque; +@@ -4004,7 +4111,7 @@ static void *do_data_decompress(void *opaque) + param->des = 0; + qemu_mutex_unlock(¶m->mutex); + +- ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE); ++ ret = decompress_ops->decompress_data(param, des, TARGET_PAGE_SIZE); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +@@ -4074,7 +4181,7 @@ static void compress_threads_load_cleanup(void) + qemu_thread_join(decompress_threads + i); + qemu_mutex_destroy(&decomp_param[i].mutex); + qemu_cond_destroy(&decomp_param[i].cond); +- inflateEnd(&decomp_param[i].stream); ++ decompress_ops->load_cleanup(&decomp_param[i]); + g_free(decomp_param[i].compbuf); + decomp_param[i].compbuf = NULL; + } +@@ -4083,6 +4190,7 @@ static void compress_threads_load_cleanup(void) + decompress_threads = NULL; + decomp_param = NULL; + decomp_file = NULL; ++ clean_decompress_ops(); + } + + static int compress_threads_load_setup(QEMUFile *f) +@@ -4093,6 +4201,11 @@ static int compress_threads_load_setup(QEMUFile *f) + return 0; + } + ++ if (set_decompress_ops() < 0) { ++ clean_decompress_ops(); ++ return -1; ++ } ++ + thread_count = migrate_decompress_threads(); + decompress_threads = g_new0(QemuThread, thread_count); + decomp_param = g_new0(DecompressParam, thread_count); +@@ -4100,7 +4213,7 @@ static int compress_threads_load_setup(QEMUFile *f) + qemu_cond_init(&decomp_done_cond); + decomp_file = f; + for (i = 0; i < thread_count; i++) { +- if (inflateInit(&decomp_param[i].stream) != Z_OK) { ++ if (decompress_ops->load_setup(&decomp_param[i]) < 0) { + goto exit; + } + +@@ -4642,7 +4755,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) + + case RAM_SAVE_FLAG_COMPRESS_PAGE: + len = qemu_get_be32(f); +- if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { ++ if (decompress_ops->check_len(len)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; +-- +2.27.0 + diff --git a/migration-Add-zstd-support-in-multi-thread-compressi.patch b/migration-Add-zstd-support-in-multi-thread-compressi.patch new file mode 100644 index 0000000000000000000000000000000000000000..a84bb368ebe5a81e223c805c151ea4896ccc4e51 --- /dev/null +++ b/migration-Add-zstd-support-in-multi-thread-compressi.patch @@ -0,0 +1,231 @@ +From 54a1b546e0bd0cc41669bf7ade806c6c777c96ad Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 16:15:10 +0800 +Subject: [PATCH] migration: Add zstd support in multi-thread compression + +This patch enables zstd option in multi-thread compression. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-properties-system.c | 2 +- + migration/ram.c | 130 ++++++++++++++++++++++++++++++- + qapi/migration.json | 2 +- + 3 files changed, 130 insertions(+), 4 deletions(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 67ed89b406..6d48903c87 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -406,7 +406,7 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd) + const PropertyInfo qdev_prop_compress_method = { + .name = "CompressMethod", + .description = "multi-thread compression method, " +- "zlib", ++ "zlib/zstd", + .enum_table = &CompressMethod_lookup, + .get = get_enum, + .set = set_enum, +diff --git a/migration/ram.c b/migration/ram.c +index 3ed808a4ca..ba1e729c39 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -59,6 +59,10 @@ + #include "savevm.h" + #include "qemu/iov.h" + ++#ifdef CONFIG_ZSTD ++#include ++#include ++#endif + /***********************************************************/ + /* ram save/restore */ + +@@ -415,11 +419,16 @@ struct CompressParam { + ram_addr_t offset; + + /* internally used fields */ +- z_stream stream; + uint8_t *originbuf; + + /* for zlib compression */ + z_stream stream; ++ ++#ifdef CONFIG_ZSTD ++ ZSTD_CStream *zstd_cs; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct CompressParam CompressParam; + +@@ -434,6 +443,11 @@ struct DecompressParam { + + /* for zlib compression */ + z_stream stream; ++#ifdef CONFIG_ZSTD ++ ZSTD_DStream *zstd_ds; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct DecompressParam DecompressParam; + +@@ -482,7 +496,7 @@ static int zlib_save_setup(CompressParam *param) + } + + static ssize_t zlib_compress_data(CompressParam *param, size_t size) +- ++{ + int err; + uint8_t *dest = NULL; + z_stream *stream = ¶m->stream; +@@ -567,6 +581,103 @@ static int zlib_check_len(int len) + return len < 0 || len > compressBound(TARGET_PAGE_SIZE); + } + ++#ifdef CONFIG_ZSTD ++static int zstd_save_setup(CompressParam *param) ++{ ++ int res; ++ param->zstd_cs = ZSTD_createCStream(); ++ if (!param->zstd_cs) { ++ return -1; ++ } ++ res = ZSTD_initCStream(param->zstd_cs, migrate_compress_level()); ++ if (ZSTD_isError(res)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_save_cleanup(CompressParam *param) ++{ ++ ZSTD_freeCStream(param->zstd_cs); ++ param->zstd_cs = NULL; ++} ++static ssize_t zstd_compress_data(CompressParam *param, size_t size) ++{ ++ int ret; ++ uint8_t *dest = NULL; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ if (blen < ZSTD_compressBound(size)) { ++ return -1; ++ } ++ param->out.dst = dest; ++ param->out.size = blen; ++ param->out.pos = 0; ++ param->in.src = p; ++ param->in.size = size; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_compressStream2(param->zstd_cs, ¶m->out, ++ ¶m->in, ZSTD_e_end); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ blen = param->out.pos; ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static int zstd_load_setup(DecompressParam *param) ++{ ++ int ret; ++ param->zstd_ds = ZSTD_createDStream(); ++ if (!param->zstd_ds) { ++ return -1; ++ } ++ ret = ZSTD_initDStream(param->zstd_ds); ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_load_cleanup(DecompressParam *param) ++{ ++ ZSTD_freeDStream(param->zstd_ds); ++ param->zstd_ds = NULL; ++} ++static int ++zstd_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int ret; ++ param->out.dst = dest; ++ param->out.size = size; ++ param->out.pos = 0; ++ param->in.src = param->compbuf; ++ param->in.size = param->len; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_decompressStream(param->zstd_ds, ¶m->out, ¶m->in); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return ret; ++} ++static int zstd_check_len(int len) ++{ ++ return len < 0 || len > ZSTD_compressBound(TARGET_PAGE_SIZE); ++} ++#endif ++ + static int set_compress_ops(void) + { + compress_ops = g_new0(MigrationCompressOps, 1); +@@ -577,6 +688,13 @@ static int set_compress_ops(void) + compress_ops->save_cleanup = zlib_save_cleanup; + compress_ops->compress_data = zlib_compress_data; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ compress_ops->save_setup = zstd_save_setup; ++ compress_ops->save_cleanup = zstd_save_cleanup; ++ compress_ops->compress_data = zstd_compress_data; ++ break; ++#endif + default: + return -1; + } +@@ -595,6 +713,14 @@ static int set_decompress_ops(void) + decompress_ops->decompress_data = zlib_decompress_data; + decompress_ops->check_len = zlib_check_len; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ decompress_ops->load_setup = zstd_load_setup; ++ decompress_ops->load_cleanup = zstd_load_cleanup; ++ decompress_ops->decompress_data = zstd_decompress_data; ++ decompress_ops->check_len = zstd_check_len; ++ break; ++#endif + default: + return -1; + } +diff --git a/qapi/migration.json b/qapi/migration.json +index b0e8c493ee..587ef65872 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -493,7 +493,7 @@ + # + ## + { 'enum': 'CompressMethod', +- 'data': [ 'zlib' ] } ++ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'defined(CONFIG_ZSTD)' } ] } + + ## + # @MigrationParameter: +-- +2.27.0 + diff --git a/migration-Change-SaveStateEntry.instance_id-into-uin.patch b/migration-Change-SaveStateEntry.instance_id-into-uin.patch new file mode 100644 index 0000000000000000000000000000000000000000..3eb83b3996ccd7b934d7ca5c65800ead9c0ae3bd --- /dev/null +++ b/migration-Change-SaveStateEntry.instance_id-into-uin.patch @@ -0,0 +1,158 @@ +From 2eadc5c611ca8cc916f74c0f393f1fd942903ef7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 16 Oct 2019 10:29:31 +0800 +Subject: [PATCH 6/8] migration: Change SaveStateEntry.instance_id into + uint32_t + +It was always used as 32bit, so define it as used to be clear. +Instead of using -1 as the auto-gen magic value, we switch to +UINT32_MAX. We also make sure that we don't auto-gen this value to +avoid overflowed instance IDs without being noticed. + +Suggested-by: Juan Quintela +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + hw/intc/apic_common.c | 2 +- + include/migration/register.h | 2 +- + include/migration/vmstate.h | 2 +- + migration/savevm.c | 18 ++++++++++-------- + stubs/vmstate.c | 2 +- + 5 files changed, 14 insertions(+), 12 deletions(-) + +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index faea1af..07adba0 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -313,7 +313,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + APICCommonState *s = APIC_COMMON(dev); + APICCommonClass *info; + static DeviceState *vapic; +- int instance_id = s->id; ++ uint32_t instance_id = s->id; + + info = APIC_COMMON_GET_CLASS(s); + info->realize(dev, errp); +diff --git a/include/migration/register.h b/include/migration/register.h +index 3d0b983..8b2bc5b 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -70,7 +70,7 @@ typedef struct SaveVMHandlers { + + int register_savevm_live(DeviceState *dev, + const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque); +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 92f531a..8abd2e3 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1117,7 +1117,7 @@ bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + #define VMSTATE_INSTANCE_ID_ANY -1 + + /* Returns: 0 on success, -1 on failure */ +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +diff --git a/migration/savevm.c b/migration/savevm.c +index 62552ab..7d89c57 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -229,7 +229,7 @@ typedef struct CompatEntry { + typedef struct SaveStateEntry { + QTAILQ_ENTRY(SaveStateEntry) entry; + char idstr[256]; +- int instance_id; ++ uint32_t instance_id; + int alias_id; + int version_id; + /* version id read from the stream */ +@@ -616,10 +616,10 @@ void dump_vmstate_json_to_file(FILE *out_file) + fclose(out_file); + } + +-static int calculate_new_instance_id(const char *idstr) ++static uint32_t calculate_new_instance_id(const char *idstr) + { + SaveStateEntry *se; +- int instance_id = 0; ++ uint32_t instance_id = 0; + + QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { + if (strcmp(idstr, se->idstr) == 0 +@@ -627,6 +627,8 @@ static int calculate_new_instance_id(const char *idstr) + instance_id = se->instance_id + 1; + } + } ++ /* Make sure we never loop over without being noticed */ ++ assert(instance_id != VMSTATE_INSTANCE_ID_ANY); + return instance_id; + } + +@@ -682,7 +684,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + distinguishing id for all instances of your device class. */ + int register_savevm_live(DeviceState *dev, + const char *idstr, +- int instance_id, ++ uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, + void *opaque) +@@ -756,7 +758,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque) + } + } + +-int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, ++int vmstate_register_with_alias_id(DeviceState *dev, uint32_t instance_id, + const VMStateDescription *vmsd, + void *opaque, int alias_id, + int required_for_version, +@@ -1507,7 +1509,7 @@ int qemu_save_device_state(QEMUFile *f) + return qemu_file_get_error(f); + } + +-static SaveStateEntry *find_se(const char *idstr, int instance_id) ++static SaveStateEntry *find_se(const char *idstr, uint32_t instance_id) + { + SaveStateEntry *se; + +@@ -2187,7 +2189,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + /* Find savevm section */ + se = find_se(idstr, instance_id); + if (se == NULL) { +- error_report("Unknown savevm section or instance '%s' %d. " ++ error_report("Unknown savevm section or instance '%s' %"PRIu32". " + "Make sure that your current VM setup matches your " + "saved VM setup, including any hotplugged devices", + idstr, instance_id); +@@ -2211,7 +2213,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis) + + ret = vmstate_load(f, se); + if (ret < 0) { +- error_report("error while loading state for instance 0x%x of" ++ error_report("error while loading state for instance 0x%"PRIx32" of" + " device '%s'", instance_id, idstr); + return ret; + } +diff --git a/stubs/vmstate.c b/stubs/vmstate.c +index e1e89b8..4ed5cc6 100644 +--- a/stubs/vmstate.c ++++ b/stubs/vmstate.c +@@ -4,7 +4,7 @@ + const VMStateDescription vmstate_dummy = {}; + + int vmstate_register_with_alias_id(DeviceState *dev, +- int instance_id, ++ uint32_t instance_id, + const VMStateDescription *vmsd, + void *base, int alias_id, + int required_for_version, +-- +1.8.3.1 + diff --git a/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch b/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch new file mode 100644 index 0000000000000000000000000000000000000000..79548949d7f449db1c57df2b747e347d7b220db5 --- /dev/null +++ b/migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch @@ -0,0 +1,80 @@ +From 79d722679731233ccb1aa775d896a4bf21e13d44 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 27 May 2020 10:02:06 +0800 +Subject: [PATCH] migration: Compat virtual timer adjust for v4.0.1 and v4.1.0 + +Vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime +is introduced in openEuler qemu-4.1.0. To maintain the compatibility +and enable cross version migration, let's enable vtimer adjust only +if kvm_adjvtime is not enabled, otherwise there may be conflicts +between vtimer adjust and kvm_adjvtime. + +After this modification: +1: openEuler qemu-4.0.1 use vtimer as the default virtual timer +2: openEuler qemu-4.1.0 use kvm_adjvtime as the defaut virtual timer + +Migration from openEuler qemu-4.0.1 to openEuler qemu-4.1.0 will +be ok, but migration path from upstream qemu-4.0.1 to openEuler +qemu-4..0.1 will be broken. + +Since openEuler qemu-4.1.0, kvm_adjvtime is used as the default +virtual timer. So please upgrade to openEuler qemu-4.1.0 and +use the virt-4.1 machine. + +Signed-off-by: Ying Fang + +diff --git a/cpus.c b/cpus.c +index b9aa51f8..6a28bdef 100644 +--- a/cpus.c ++++ b/cpus.c +@@ -1067,6 +1067,12 @@ void cpu_synchronize_all_pre_loadvm(void) + } + + #ifdef __aarch64__ ++static bool kvm_adjvtime_enabled(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ return cpu->kvm_adjvtime == true; ++} ++ + static void get_vcpu_timer_tick(CPUState *cs) + { + CPUARMState *env = &ARM_CPU(cs)->env; +@@ -1096,7 +1102,13 @@ static int do_vm_stop(RunState state, bool send_stop) + cpu_disable_ticks(); + pause_all_vcpus(); + #ifdef __aarch64__ +- if (first_cpu) { ++ /* vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime ++ * is introduced in openEuler qemu-4.1.0. To maintain the compatibility ++ * and enable cross version migration, let's enable vtimer adjust only ++ * if kvm_adjvtime is not enabled, otherwise there may be conflicts ++ * between vtimer adjust and kvm_adjvtime. ++ */ ++ if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { + get_vcpu_timer_tick(first_cpu); + } + #endif +@@ -1946,6 +1958,7 @@ void cpu_resume(CPUState *cpu) + } + + #ifdef __aarch64__ ++ + static void set_vcpu_timer_tick(CPUState *cs) + { + CPUARMState *env = &ARM_CPU(cs)->env; +@@ -1977,7 +1990,10 @@ void resume_all_vcpus(void) + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + #ifdef __aarch64__ +- if (first_cpu) { ++ /* Enable vtimer adjust only if kvm_adjvtime is not enabled, otherwise ++ * there may be conflicts between vtimer adjust and kvm_adjvtime. ++ */ ++ if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { + set_vcpu_timer_tick(first_cpu); + } + #endif +-- +2.23.0 + diff --git a/migration-Count-new_dirty-instead-of-real_dirty.patch b/migration-Count-new_dirty-instead-of-real_dirty.patch new file mode 100644 index 0000000000000000000000000000000000000000..a9ff297ffac9fb42ce63ef8a256e648adf1166dd --- /dev/null +++ b/migration-Count-new_dirty-instead-of-real_dirty.patch @@ -0,0 +1,74 @@ +From 63320ae36834e4ff2f0d139f205c464caa3887b4 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 22 Jun 2020 11:20:37 +0800 +Subject: [PATCH 04/11] migration: Count new_dirty instead of real_dirty + +real_dirty_pages becomes equal to total ram size after dirty log sync +in ram_init_bitmaps, the reason is that the bitmap of ramblock is +initialized to be all set, so old path counts them as "real dirty" at +beginning. + +This causes wrong dirty rate and false positive throttling. + +Signed-off-by: Keqian Zhu +Message-Id: <20200622032037.31112-1-zhukeqian1@huawei.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: BiaoXiang Ye +--- + include/exec/ram_addr.h | 5 +---- + migration/ram.c | 8 +++++--- + 2 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index b7b2e60f..52344066 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -485,8 +485,7 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, + static inline + uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + ram_addr_t start, +- ram_addr_t length, +- uint64_t *real_dirty_pages) ++ ram_addr_t length) + { + ram_addr_t addr; + unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS); +@@ -512,7 +511,6 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + if (src[idx][offset]) { + unsigned long bits = atomic_xchg(&src[idx][offset], 0); + unsigned long new_dirty; +- *real_dirty_pages += ctpopl(bits); + new_dirty = ~dest[k]; + dest[k] |= bits; + new_dirty &= bits; +@@ -545,7 +543,6 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + start + addr + offset, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { +- *real_dirty_pages += 1; + long k = (start + addr) >> TARGET_PAGE_BITS; + if (!test_and_set_bit(k, dest)) { + num_dirty++; +diff --git a/migration/ram.c b/migration/ram.c +index 840e3548..83cabec6 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1765,9 +1765,11 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, + static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb, + ram_addr_t length) + { +- rs->migration_dirty_pages += +- cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, +- &rs->num_dirty_pages_period); ++ uint64_t new_dirty_pages = ++ cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length); ++ ++ rs->migration_dirty_pages += new_dirty_pages; ++ rs->num_dirty_pages_period += new_dirty_pages; + } + + /** +-- +2.27.0.dirty + diff --git a/migration-Create-migration_is_running.patch b/migration-Create-migration_is_running.patch new file mode 100644 index 0000000000000000000000000000000000000000..86f0e6d3db2a2a51c25ebe9d6f0f4d6c48dc2de0 --- /dev/null +++ b/migration-Create-migration_is_running.patch @@ -0,0 +1,107 @@ +From 3d75adce1b9b465c45a9e841d285b3524e19cd7d Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:39:46 +0800 +Subject: [PATCH] migration: Create migration_is_running() + +This function returns true if we are in the middle of a migration. +It is like migration_is_setup_or_active() with CANCELLING and COLO. +Adapt all callers that are needed. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +--- + migration/migration.c | 28 +++++++++++++++++++++++----- + migration/migration.h | 1 + + migration/savevm.c | 4 +--- + 3 files changed, 25 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 993d77b7d6..923a1d9d3f 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -822,6 +822,26 @@ bool migration_is_setup_or_active(int state) + } + } + ++bool migration_is_running(int state) ++{ ++ switch (state) { ++ case MIGRATION_STATUS_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_ACTIVE: ++ case MIGRATION_STATUS_POSTCOPY_PAUSED: ++ case MIGRATION_STATUS_POSTCOPY_RECOVER: ++ case MIGRATION_STATUS_SETUP: ++ case MIGRATION_STATUS_PRE_SWITCHOVER: ++ case MIGRATION_STATUS_DEVICE: ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_COLO: ++ return true; ++ ++ default: ++ return false; ++ ++ } ++} ++ + static void populate_ram_info(MigrationInfo *info, MigrationState *s) + { + info->has_ram = true; +@@ -1074,7 +1094,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + MigrationCapabilityStatusList *cap; + bool cap_list[MIGRATION_CAPABILITY__MAX]; + +- if (migration_is_setup_or_active(s->state)) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return; + } +@@ -1588,7 +1608,7 @@ static void migrate_fd_cancel(MigrationState *s) + + do { + old_state = s->state; +- if (!migration_is_setup_or_active(old_state)) { ++ if (!migration_is_running(old_state)) { + break; + } + /* If the migration is paused, kick it out of the pause */ +@@ -1873,9 +1893,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + return true; + } + +- if (migration_is_setup_or_active(s->state) || +- s->state == MIGRATION_STATUS_CANCELLING || +- s->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(s->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return false; + } +diff --git a/migration/migration.h b/migration/migration.h +index e5aaf2ef70..f2bd4ebe33 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -282,6 +282,7 @@ void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_fd_connect(MigrationState *s, Error *error_in); + + bool migration_is_setup_or_active(int state); ++bool migration_is_running(int state); + + void migrate_init(MigrationState *s); + bool migration_is_blocked(Error **errp); +diff --git a/migration/savevm.c b/migration/savevm.c +index 8163de7f21..f0974380e5 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1414,9 +1414,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + MigrationState *ms = migrate_get_current(); + MigrationStatus status; + +- if (migration_is_setup_or_active(ms->state) || +- ms->state == MIGRATION_STATUS_CANCELLING || +- ms->state == MIGRATION_STATUS_COLO) { ++ if (migration_is_running(ms->state)) { + error_setg(errp, QERR_MIGRATION_ACTIVE); + return -EINVAL; + } +-- +2.27.0 + diff --git a/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch b/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd32b04997c14345aa7f488cd1a960a106d9aa15 --- /dev/null +++ b/migration-Define-VMSTATE_INSTANCE_ID_ANY.patch @@ -0,0 +1,237 @@ +From 21e049e2941b108df45c9089cbf7539caae538e6 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 16 Oct 2019 10:29:30 +0800 +Subject: [PATCH 5/8] migration: Define VMSTATE_INSTANCE_ID_ANY + +Define the new macro VMSTATE_INSTANCE_ID_ANY for callers who wants to +auto-generate the vmstate instance ID. Previously it was hard coded +as -1 instead of this macro. It helps to change this default value in +the follow up patches. No functional change. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + hw/arm/stellaris.c | 2 +- + hw/core/qdev.c | 4 +++- + hw/display/ads7846.c | 2 +- + hw/i2c/core.c | 2 +- + hw/input/stellaris_input.c | 3 ++- + hw/intc/apic_common.c | 2 +- + hw/misc/max111x.c | 3 ++- + hw/net/eepro100.c | 3 ++- + hw/pci/pci.c | 2 +- + hw/ppc/spapr.c | 2 +- + hw/timer/arm_timer.c | 2 +- + hw/tpm/tpm_emulator.c | 3 ++- + include/migration/vmstate.h | 2 ++ + migration/savevm.c | 8 ++++---- + 14 files changed, 24 insertions(+), 16 deletions(-) + +diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c +index 499035f..3432033 100644 +--- a/hw/arm/stellaris.c ++++ b/hw/arm/stellaris.c +@@ -705,7 +705,7 @@ static int stellaris_sys_init(uint32_t base, qemu_irq irq, + memory_region_init_io(&s->iomem, NULL, &ssys_ops, s, "ssys", 0x00001000); + memory_region_add_subregion(get_system_memory(), base, &s->iomem); + ssys_reset(s); +- vmstate_register(NULL, -1, &vmstate_stellaris_sys, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_stellaris_sys, s); + return 0; + } + +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index 94ebc0a..4b32f2f 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -848,7 +848,9 @@ static void device_set_realized(Object *obj, bool value, Error **errp) + dev->canonical_path = object_get_canonical_path(OBJECT(dev)); + + if (qdev_get_vmsd(dev)) { +- if (vmstate_register_with_alias_id(dev, -1, qdev_get_vmsd(dev), dev, ++ if (vmstate_register_with_alias_id(dev, ++ VMSTATE_INSTANCE_ID_ANY, ++ qdev_get_vmsd(dev), dev, + dev->instance_id_alias, + dev->alias_required_for_version, + &local_err) < 0) { +diff --git a/hw/display/ads7846.c b/hw/display/ads7846.c +index 1a97e97..be1802e 100644 +--- a/hw/display/ads7846.c ++++ b/hw/display/ads7846.c +@@ -152,7 +152,7 @@ static void ads7846_realize(SSISlave *d, Error **errp) + + ads7846_int_update(s); + +- vmstate_register(NULL, -1, &vmstate_ads7846, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_ads7846, s); + } + + static void ads7846_class_init(ObjectClass *klass, void *data) +diff --git a/hw/i2c/core.c b/hw/i2c/core.c +index 20f36f1..186702b 100644 +--- a/hw/i2c/core.c ++++ b/hw/i2c/core.c +@@ -59,7 +59,7 @@ I2CBus *i2c_init_bus(DeviceState *parent, const char *name) + + bus = I2C_BUS(qbus_create(TYPE_I2C_BUS, parent, name)); + QLIST_INIT(&bus->current_devs); +- vmstate_register(NULL, -1, &vmstate_i2c_bus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_i2c_bus, bus); + return bus; + } + +diff --git a/hw/input/stellaris_input.c b/hw/input/stellaris_input.c +index 3a666d6..6c5b6d8 100644 +--- a/hw/input/stellaris_input.c ++++ b/hw/input/stellaris_input.c +@@ -86,5 +86,6 @@ void stellaris_gamepad_init(int n, qemu_irq *irq, const int *keycode) + } + s->num_buttons = n; + qemu_add_kbd_event_handler(stellaris_gamepad_put_key, s); +- vmstate_register(NULL, -1, &vmstate_stellaris_gamepad, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_stellaris_gamepad, s); + } +diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c +index e764a2b..faea1af 100644 +--- a/hw/intc/apic_common.c ++++ b/hw/intc/apic_common.c +@@ -329,7 +329,7 @@ static void apic_common_realize(DeviceState *dev, Error **errp) + } + + if (s->legacy_instance_id) { +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, + s, -1, 0, NULL); +diff --git a/hw/misc/max111x.c b/hw/misc/max111x.c +index d373ece..364cb01 100644 +--- a/hw/misc/max111x.c ++++ b/hw/misc/max111x.c +@@ -144,7 +144,8 @@ static int max111x_init(SSISlave *d, int inputs) + s->input[7] = 0x80; + s->com = 0; + +- vmstate_register(dev, -1, &vmstate_max111x, s); ++ vmstate_register(dev, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_max111x, s); + return 0; + } + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index 6607c91..03edd25 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -1872,7 +1872,8 @@ static void e100_nic_realize(PCIDevice *pci_dev, Error **errp) + + s->vmstate = g_memdup(&vmstate_eepro100, sizeof(vmstate_eepro100)); + s->vmstate->name = qemu_get_queue(s->nic)->model; +- vmstate_register(&pci_dev->qdev, -1, s->vmstate, s); ++ vmstate_register(&pci_dev->qdev, VMSTATE_INSTANCE_ID_ANY, ++ s->vmstate, s); + } + + static void eepro100_instance_init(Object *obj) +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 8076a80..e74143c 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -118,7 +118,7 @@ static void pci_bus_realize(BusState *qbus, Error **errp) + bus->machine_done.notify = pcibus_machine_done; + qemu_add_machine_init_done_notifier(&bus->machine_done); + +- vmstate_register(NULL, -1, &vmstate_pcibus, bus); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_pcibus, bus); + } + + static void pcie_bus_realize(BusState *qbus, Error **errp) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 12ed4b0..b0f37c3 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live(NULL, "spapr/htab", -1, 1, ++ register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c +index f0a7534..1ce4e01 100644 +--- a/hw/timer/arm_timer.c ++++ b/hw/timer/arm_timer.c +@@ -172,7 +172,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq) + + bh = qemu_bh_new(arm_timer_tick, s); + s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT); +- vmstate_register(NULL, -1, &vmstate_arm_timer, s); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_arm_timer, s); + return s; + } + +diff --git a/hw/tpm/tpm_emulator.c b/hw/tpm/tpm_emulator.c +index 38bf5fd..836c489 100644 +--- a/hw/tpm/tpm_emulator.c ++++ b/hw/tpm/tpm_emulator.c +@@ -914,7 +914,8 @@ static void tpm_emulator_inst_init(Object *obj) + tpm_emu->cur_locty_number = ~0; + qemu_mutex_init(&tpm_emu->mutex); + +- vmstate_register(NULL, -1, &vmstate_tpm_emulator, obj); ++ vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, ++ &vmstate_tpm_emulator, obj); + } + + /* +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index c2bfa7a..92f531a 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -1114,6 +1114,8 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, + + bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque); + ++#define VMSTATE_INSTANCE_ID_ANY -1 ++ + /* Returns: 0 on success, -1 on failure */ + int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + const VMStateDescription *vmsd, +diff --git a/migration/savevm.c b/migration/savevm.c +index 480c511..62552ab 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -722,7 +722,7 @@ int register_savevm_live(DeviceState *dev, + } + pstrcat(se->idstr, sizeof(se->idstr), idstr); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +@@ -789,14 +789,14 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, + + se->compat = g_new0(CompatEntry, 1); + pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); +- se->compat->instance_id = instance_id == -1 ? ++ se->compat->instance_id = instance_id == VMSTATE_INSTANCE_ID_ANY ? + calculate_compat_instance_id(vmsd->name) : instance_id; +- instance_id = -1; ++ instance_id = VMSTATE_INSTANCE_ID_ANY; + } + } + pstrcat(se->idstr, sizeof(se->idstr), vmsd->name); + +- if (instance_id == -1) { ++ if (instance_id == VMSTATE_INSTANCE_ID_ANY) { + se->instance_id = calculate_new_instance_id(se->idstr); + } else { + se->instance_id = instance_id; +-- +1.8.3.1 + diff --git a/migration-Don-t-send-data-if-we-have-stopped.patch b/migration-Don-t-send-data-if-we-have-stopped.patch new file mode 100644 index 0000000000000000000000000000000000000000..08d5d3bbbdd0b8a6f7d3dbc485cc3814a830e483 --- /dev/null +++ b/migration-Don-t-send-data-if-we-have-stopped.patch @@ -0,0 +1,31 @@ +From 855404b4766ddda851035587aa1b84768abbaf11 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 22 Jan 2020 11:36:12 +0100 +Subject: [PATCH] migration: Don't send data if we have stopped + +If we do a cancel, we got out without one error, but we can't do the +rest of the output as in a normal situation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +--- + migration/ram.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index b74929542d..dc9831d7f3 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3686,7 +3686,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- if (ret >= 0) { ++ if (ret >= 0 ++ && migration_is_setup_or_active(migrate_get_current()->state)) { + multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +-- +2.27.0 + diff --git a/migration-Make-global-sem_sync-semaphore-by-channel.patch b/migration-Make-global-sem_sync-semaphore-by-channel.patch new file mode 100644 index 0000000000000000000000000000000000000000..d9dbab23e4f83d88595956668c61385618864fd7 --- /dev/null +++ b/migration-Make-global-sem_sync-semaphore-by-channel.patch @@ -0,0 +1,100 @@ +From 8c3794d709eefdae777477bef7ff3511d55bf418 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 14 Aug 2019 04:02:14 +0200 +Subject: [PATCH 05/10] migration: Make global sem_sync semaphore by channel + +This makes easy to debug things because when you want for all threads +to arrive at that semaphore, you know which one your are waiting for. + +Change-Id: I533af8cdc68f619b68eff8e4e573c4de371a3954 +Signed-off-by: Juan Quintela +Message-Id: <20190814020218.1868-3-quintela@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index c75716bb..51811c2d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -661,6 +661,8 @@ typedef struct { + uint64_t num_packets; + /* pages sent through this channel */ + uint64_t num_pages; ++ /* syncs main thread and channels */ ++ QemuSemaphore sem_sync; + } MultiFDSendParams; + + typedef struct { +@@ -896,8 +898,6 @@ struct { + MultiFDSendParams *params; + /* array of pages to sent */ + MultiFDPages_t *pages; +- /* syncs main thread and channels */ +- QemuSemaphore sem_sync; + /* global number of generated multifd packets */ + uint64_t packet_num; + /* send channels ready */ +@@ -1037,6 +1037,7 @@ void multifd_save_cleanup(void) + p->c = NULL; + qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); ++ qemu_sem_destroy(&p->sem_sync); + g_free(p->name); + p->name = NULL; + multifd_pages_clear(p->pages); +@@ -1046,7 +1047,6 @@ void multifd_save_cleanup(void) + p->packet = NULL; + } + qemu_sem_destroy(&multifd_send_state->channels_ready); +- qemu_sem_destroy(&multifd_send_state->sem_sync); + g_free(multifd_send_state->params); + multifd_send_state->params = NULL; + multifd_pages_clear(multifd_send_state->pages); +@@ -1096,7 +1096,7 @@ static void multifd_send_sync_main(RAMState *rs) + MultiFDSendParams *p = &multifd_send_state->params[i]; + + trace_multifd_send_sync_main_wait(p->id); +- qemu_sem_wait(&multifd_send_state->sem_sync); ++ qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); + } +@@ -1156,7 +1156,7 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&multifd_send_state->sem_sync); ++ qemu_sem_post(&p->sem_sync); + } + qemu_sem_post(&multifd_send_state->channels_ready); + } else if (p->quit) { +@@ -1179,7 +1179,7 @@ out: + */ + if (ret != 0) { + if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&multifd_send_state->sem_sync); ++ qemu_sem_post(&p->sem_sync); + } + qemu_sem_post(&multifd_send_state->channels_ready); + } +@@ -1225,7 +1225,6 @@ int multifd_save_setup(void) + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); +- qemu_sem_init(&multifd_send_state->sem_sync, 0); + qemu_sem_init(&multifd_send_state->channels_ready, 0); + + for (i = 0; i < thread_count; i++) { +@@ -1233,6 +1232,7 @@ int multifd_save_setup(void) + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); ++ qemu_sem_init(&p->sem_sync, 0); + p->quit = false; + p->pending_job = 0; + p->id = i; +-- +2.19.1 diff --git a/migration-Make-sure-that-we-don-t-call-write-in-case.patch b/migration-Make-sure-that-we-don-t-call-write-in-case.patch new file mode 100644 index 0000000000000000000000000000000000000000..73e3fe41d0ee74e22d7e5434ca89b421bbce0708 --- /dev/null +++ b/migration-Make-sure-that-we-don-t-call-write-in-case.patch @@ -0,0 +1,94 @@ +From 2898f8669445d38d4a6a8986c1e6d94381a7e869 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Tue, 3 Mar 2020 14:51:35 +0000 +Subject: [PATCH] migration: Make sure that we don't call write() in case of + error + +RH-Author: Juan Quintela +Message-id: <20200303145143.149290-3-quintela@redhat.com> +Patchwork-id: 94113 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH v2 02/10] migration: Make sure that we don't call write() in case of error +Bugzilla: 1738451 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +If we are exiting due to an error/finish/.... Just don't try to even +touch the channel with one IO operation. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela +(cherry picked from commit 4d65a6216bfc44891ac298b74a6921d479805131) +Signed-off-by: Danilo C. L. de Paula +--- + migration/ram.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index d4ac696899..27585a4f3e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1195,6 +1195,12 @@ struct { + uint64_t packet_num; + /* send channels ready */ + QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; + } *multifd_send_state; + + /* +@@ -1223,6 +1229,10 @@ static int multifd_send_pages(RAMState *rs) + MultiFDPages_t *pages = multifd_send_state->pages; + uint64_t transferred; + ++ if (atomic_read(&multifd_send_state->exiting)) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + /* + * next_channel can remain from a previous migration that was +@@ -1308,6 +1318,16 @@ static void multifd_send_terminate_threads(Error *err) + } + } + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (atomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -1425,6 +1445,10 @@ static void *multifd_send_thread(void *opaque) + + while (true) { + qemu_sem_wait(&p->sem); ++ ++ if (atomic_read(&multifd_send_state->exiting)) { ++ break; ++ } + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +@@ -1655,6 +1679,7 @@ int multifd_save_setup(void) + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); + qemu_sem_init(&multifd_send_state->channels_ready, 0); ++ atomic_set(&multifd_send_state->exiting, 0); + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +-- +2.27.0 + diff --git a/migration-Maybe-VM-is-paused-when-migration-is-cance.patch b/migration-Maybe-VM-is-paused-when-migration-is-cance.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c918f3c17ca619dfb88e1856d3d26625419f465 --- /dev/null +++ b/migration-Maybe-VM-is-paused-when-migration-is-cance.patch @@ -0,0 +1,57 @@ +From 5e99e1329fa52dce8ab784a960e64a3e19b429aa Mon Sep 17 00:00:00 2001 +From: Zhimin Feng +Date: Tue, 14 Jan 2020 17:43:09 +0800 +Subject: [PATCH 07/10] migration: Maybe VM is paused when migration is + cancelled + +If the migration is cancelled when it is in the completion phase, +the migration state is set to MIGRATION_STATUS_CANCELLING. +The VM maybe wait for the 'pause_sem' semaphore in migration_maybe_pause +function, so that VM always is paused. + +Change-Id: Ib2f2f42ee1edbb14da269ee19ba1fe16dd363822 +Reported-by: Euler Robot +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/migration.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index bea9b1d7..114c33a1 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2731,14 +2731,22 @@ static int migration_maybe_pause(MigrationState *s, + /* This block intentionally left blank */ + } + +- qemu_mutex_unlock_iothread(); +- migrate_set_state(&s->state, *current_active_state, +- MIGRATION_STATUS_PRE_SWITCHOVER); +- qemu_sem_wait(&s->pause_sem); +- migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, +- new_state); +- *current_active_state = new_state; +- qemu_mutex_lock_iothread(); ++ /* ++ * If the migration is cancelled when it is in the completion phase, ++ * the migration state is set to MIGRATION_STATUS_CANCELLING. ++ * So we don't need to wait a semaphore, otherwise we would always ++ * wait for the 'pause_sem' semaphore. ++ */ ++ if (s->state != MIGRATION_STATUS_CANCELLING) { ++ qemu_mutex_unlock_iothread(); ++ migrate_set_state(&s->state, *current_active_state, ++ MIGRATION_STATUS_PRE_SWITCHOVER); ++ qemu_sem_wait(&s->pause_sem); ++ migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, ++ new_state); ++ *current_active_state = new_state; ++ qemu_mutex_lock_iothread(); ++ } + + return s->state == new_state ? 0 : -EINVAL; + } +-- +2.19.1 diff --git a/migration-Rate-limit-inside-host-pages.patch b/migration-Rate-limit-inside-host-pages.patch new file mode 100644 index 0000000000000000000000000000000000000000..17eb46f82ce18a6e2e17583ea6a77879178bc9bc --- /dev/null +++ b/migration-Rate-limit-inside-host-pages.patch @@ -0,0 +1,173 @@ +From 3e8a587b055f0e3cabf91921fca0777fe7e349f5 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 17 Mar 2020 17:05:18 +0000 +Subject: [PATCH] migration: Rate limit inside host pages + +RH-Author: Laurent Vivier +Message-id: <20200317170518.9303-1-lvivier@redhat.com> +Patchwork-id: 94374 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH] migration: Rate limit inside host pages +Bugzilla: 1814336 +RH-Acked-by: Peter Xu +RH-Acked-by: Juan Quintela +RH-Acked-by: Dr. David Alan Gilbert + +From: "Dr. David Alan Gilbert" + +When using hugepages, rate limiting is necessary within each huge +page, since a 1G huge page can take a significant time to send, so +you end up with bursty behaviour. + +Fixes: 4c011c37ecb3 ("postcopy: Send whole huge pages") +Reported-by: Lin Ma +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit 97e1e06780e70f6e98a0d2df881e0c0927d3aeb6) +Signed-off-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=1814336 +BRANCH: rhel-av-8.2.0 +UPSTREAM: Merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=27283241 +TESTED: Tested that the migration abort doesn't trigger an error message in + the kernel logs on P9 + +Signed-off-by: Danilo C. L. de Paula +--- + migration/migration.c | 57 ++++++++++++++++++++++++------------------ + migration/migration.h | 1 + + migration/ram.c | 2 ++ + migration/trace-events | 4 +-- + 4 files changed, 37 insertions(+), 27 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index fd7d81d4b6..b0b9430822 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3260,6 +3260,37 @@ void migration_consume_urgent_request(void) + qemu_sem_wait(&migrate_get_current()->rate_limit_sem); + } + ++/* Returns true if the rate limiting was broken by an urgent request */ ++bool migration_rate_limit(void) ++{ ++ int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ MigrationState *s = migrate_get_current(); ++ ++ bool urgent = false; ++ migration_update_counters(s, now); ++ if (qemu_file_rate_limit(s->to_dst_file)) { ++ /* ++ * Wait for a delay to do rate limiting OR ++ * something urgent to post the semaphore. ++ */ ++ int ms = s->iteration_start_time + BUFFER_DELAY - now; ++ trace_migration_rate_limit_pre(ms); ++ if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { ++ /* ++ * We were woken by one or more urgent things but ++ * the timedwait will have consumed one of them. ++ * The service routine for the urgent wake will dec ++ * the semaphore itself for each item it consumes, ++ * so add this one we just eat back. ++ */ ++ qemu_sem_post(&s->rate_limit_sem); ++ urgent = true; ++ } ++ trace_migration_rate_limit_post(urgent); ++ } ++ return urgent; ++} ++ + /* + * Master migration thread on the source VM. + * It drives the migration and pumps the data down the outgoing channel. +@@ -3313,8 +3344,6 @@ static void *migration_thread(void *opaque) + trace_migration_thread_setup_complete(); + + while (migration_is_active(s)) { +- int64_t current_time; +- + if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { + MigIterateState iter_state = migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { +@@ -3341,29 +3370,7 @@ static void *migration_thread(void *opaque) + update_iteration_initial_status(s); + } + +- current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- +- migration_update_counters(s, current_time); +- +- urgent = false; +- if (qemu_file_rate_limit(s->to_dst_file)) { +- /* Wait for a delay to do rate limiting OR +- * something urgent to post the semaphore. +- */ +- int ms = s->iteration_start_time + BUFFER_DELAY - current_time; +- trace_migration_thread_ratelimit_pre(ms); +- if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) { +- /* We were worken by one or more urgent things but +- * the timedwait will have consumed one of them. +- * The service routine for the urgent wake will dec +- * the semaphore itself for each item it consumes, +- * so add this one we just eat back. +- */ +- qemu_sem_post(&s->rate_limit_sem); +- urgent = true; +- } +- trace_migration_thread_ratelimit_post(urgent); +- } ++ urgent = migration_rate_limit(); + } + + trace_migration_thread_after_loop(); +diff --git a/migration/migration.h b/migration/migration.h +index 4aa72297fc..ff8a0bf12d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -345,6 +345,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); + + void migration_make_urgent_request(void); + void migration_consume_urgent_request(void); ++bool migration_rate_limit(void); + + int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp); + int migration_recv_initial_packet(QIOChannel *c, Error **errp); +diff --git a/migration/ram.c b/migration/ram.c +index 27585a4f3e..d6657a8093 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3076,6 +3076,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + } + + pss->page++; ++ /* Allow rate limiting to happen in the middle of huge pages */ ++ migration_rate_limit(); + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +diff --git a/migration/trace-events b/migration/trace-events +index c0640cd424..b4d85229d9 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -131,12 +131,12 @@ migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi6 + migration_completion_file_err(void) "" + migration_completion_postcopy_end(void) "" + migration_completion_postcopy_end_after_complete(void) "" ++migration_rate_limit_pre(int ms) "%d ms" ++migration_rate_limit_post(int urgent) "urgent: %d" + migration_return_path_end_before(void) "" + migration_return_path_end_after(int rp_error) "%d" + migration_thread_after_loop(void) "" + migration_thread_file_err(void) "" +-migration_thread_ratelimit_pre(int ms) "%d ms" +-migration_thread_ratelimit_post(int urgent) "urgent: %d" + migration_thread_setup_complete(void) "" + open_return_path_on_source(void) "" + open_return_path_on_source_continue(void) "" +-- +2.27.0 + diff --git a/migration-Refactoring-multi-thread-compress-migratio.patch b/migration-Refactoring-multi-thread-compress-migratio.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3ab4d0d97a8a1cf1c945a38b97b47a33277ab9b --- /dev/null +++ b/migration-Refactoring-multi-thread-compress-migratio.patch @@ -0,0 +1,305 @@ +From 524d8cee48006918cf181f2817e4ec3ce5a3bb12 Mon Sep 17 00:00:00 2001 +From: Zeyu Jin +Date: Sat, 30 Jan 2021 15:21:17 +0800 +Subject: [PATCH] migration: Refactoring multi-thread compress migration + +Code refactor for the compression procedure which includes: + +1. Move qemu_compress_data and qemu_put_compression_data from qemu-file.c to +ram.c, for the reason that most part of the code logical has nothing to do +with qemu-file. Besides, the decompression code is located at ram.c only. + +2. Simplify the function input arguments for compression and decompression. +Wrap the input into the param structure which already exists. This change also +makes the function much more flexible for other compression methods. + +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/qemu-file.c | 78 ++++++--------------------------------- + migration/qemu-file.h | 4 +- + migration/ram.c | 85 +++++++++++++++++++++++++++++++------------ + 3 files changed, 75 insertions(+), 92 deletions(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index be0d6c8ca8..3bba694ed4 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -695,72 +695,6 @@ uint64_t qemu_get_be64(QEMUFile *f) + return v; + } + +-/* return the size after compression, or negative value on error */ +-static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) +-{ +- int err; +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->next_out - dest; +-} +- +-/* Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * When f is not writable, return -1 if f has no space to save the +- * compressed data. +- * When f is wirtable and it has no space to save the compressed data, +- * do fflush first, if f still has no space to save the compressed +- * data, return -1. +- */ +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size) +-{ +- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); +- +- if (blen < compressBound(size)) { +- if (!qemu_file_is_writable(f)) { +- return -1; +- } +- qemu_fflush(f); +- blen = IO_BUF_SIZE - sizeof(int32_t); +- if (blen < compressBound(size)) { +- return -1; +- } +- } +- +- blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), +- blen, p, size); +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_be32(f, blen); +- if (f->ops->writev_buffer) { +- add_to_iovec(f, f->buf + f->buf_index, blen, false); +- } +- f->buf_index += blen; +- if (f->buf_index == IO_BUF_SIZE) { +- qemu_fflush(f); +- } +- return blen + sizeof(int32_t); +-} +- + /* Put the data in the buffer of f_src to the buffer of f_des, and + * then reset the buf_index of f_src to 0. + */ +@@ -820,3 +754,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block) + f->ops->set_blocking(f->opaque, block); + } + } ++ ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr) ++{ ++ *dest_ptr = f->buf + f->buf_index + sizeof(int32_t); ++ return IO_BUF_SIZE - f->buf_index - sizeof(int32_t); ++} ++ ++void qemu_put_compress_end(QEMUFile *f, unsigned int v) ++{ ++ qemu_put_be32(f, v); ++ add_buf_to_iovec(f, v); ++} +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 5de9fa2e96..6570e53e13 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -134,8 +134,6 @@ bool qemu_file_is_writable(QEMUFile *f); + + size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); + size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + + /* +@@ -162,6 +160,8 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags); + void ram_control_after_iterate(QEMUFile *f, uint64_t flags); + void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); + ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr); ++void qemu_put_compress_end(QEMUFile *f, unsigned int v); + /* Whenever this is found in the data stream, the flags + * will be passed to ram_control_load_hook in the incoming-migration + * side. This lets before_ram_iterate/after_ram_iterate add +diff --git a/migration/ram.c b/migration/ram.c +index 92ce1a53e7..f78a681ca2 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -449,26 +449,22 @@ static QemuThread *decompress_threads; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; + +-static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, +- ram_addr_t offset, uint8_t *source_buf); ++static bool do_compress_ram_page(CompressParam *param, RAMBlock *block); + + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; + RAMBlock *block; +- ram_addr_t offset; + bool zero_page; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->block) { + block = param->block; +- offset = param->offset; + param->block = NULL; + qemu_mutex_unlock(¶m->mutex); + +- zero_page = do_compress_ram_page(param->file, ¶m->stream, +- block, offset, param->originbuf); ++ zero_page = do_compress_ram_page(param, block); + + qemu_mutex_lock(&comp_done_lock); + param->done = true; +@@ -2212,28 +2208,73 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, + return 1; + } + +-static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, +- ram_addr_t offset, uint8_t *source_buf) ++/* ++ * Compress size bytes of data start at p and store the compressed ++ * data to the buffer of f. ++ * ++ * Since the file is dummy file with empty_ops, return -1 if f has no space to ++ * save the compressed data. ++ */ ++static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) ++{ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static bool do_compress_ram_page(CompressParam *param, RAMBlock *block) + { + RAMState *rs = ram_state; ++ ram_addr_t offset = param->offset; + uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); + bool zero_page = false; + int ret; + +- if (save_zero_page_to_file(rs, f, block, offset)) { ++ if (save_zero_page_to_file(rs, param->file, block, offset)) { + zero_page = true; + goto exit; + } + +- save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); ++ save_page_header(rs, param->file, block, ++ offset | RAM_SAVE_FLAG_COMPRESS_PAGE); + + /* + * copy it to a internal buffer to avoid it being modified by VM + * so that we can catch up the error during compression and + * decompression + */ +- memcpy(source_buf, p, TARGET_PAGE_SIZE); +- ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); ++ memcpy(param->originbuf, p, TARGET_PAGE_SIZE); ++ ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +@@ -3926,19 +3967,20 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) + + /* return the size after decompression, or negative value on error */ + static int +-qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) ++qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) + { + int err; + ++ z_stream *stream = ¶m->stream; ++ + err = inflateReset(stream); + if (err != Z_OK) { + return -1; + } + +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = pagesize; + stream->next_out = dest; + + err = inflate(stream, Z_NO_FLUSH); +@@ -3952,22 +3994,17 @@ qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, + static void *do_data_decompress(void *opaque) + { + DecompressParam *param = opaque; +- unsigned long pagesize; + uint8_t *des; +- int len, ret; ++ int ret; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->des) { + des = param->des; +- len = param->len; + param->des = 0; + qemu_mutex_unlock(¶m->mutex); + +- pagesize = TARGET_PAGE_SIZE; +- +- ret = qemu_uncompress_data(¶m->stream, des, pagesize, +- param->compbuf, len); ++ ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +-- +2.27.0 + diff --git a/migration-add-qemu_file_update_transfer-interface.patch b/migration-add-qemu_file_update_transfer-interface.patch new file mode 100644 index 0000000000000000000000000000000000000000..4222fd0adb202051cd57a3f0cab01e5ad52f8248 --- /dev/null +++ b/migration-add-qemu_file_update_transfer-interface.patch @@ -0,0 +1,50 @@ +From 7572495245a437da717e6829a9ce852cc3f229c9 Mon Sep 17 00:00:00 2001 +From: Zheng Chuan +Date: Mon, 20 Apr 2020 15:13:47 +0800 +Subject: [PATCH 02/10] migration: add qemu_file_update_transfer interface + +Add qemu_file_update_transfer for just update bytes_xfer for speed +limitation. This will be used for further migration feature such as +multifd migration. + +Change-Id: I969aa15305c961254b6fb9805b0ed2d65826cc5d +Signed-off-by: Ivan Ren +Reviewed-by: Wei Yang +Reviewed-by: Juan Quintela +Message-Id: <1564464816-21804-2-git-send-email-ivanren@tencent.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/qemu-file.c | 5 +++++ + migration/qemu-file.h | 1 + + 2 files changed, 6 insertions(+) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 04315855..18f48052 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -615,6 +615,11 @@ void qemu_file_reset_rate_limit(QEMUFile *f) + f->bytes_xfer = 0; + } + ++void qemu_file_update_transfer(QEMUFile *f, int64_t len) ++{ ++ f->bytes_xfer += len; ++} ++ + void qemu_put_be16(QEMUFile *f, unsigned int v) + { + qemu_put_byte(f, v >> 8); +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 13baf896..5de9fa2e 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -147,6 +147,7 @@ int qemu_peek_byte(QEMUFile *f, int offset); + void qemu_file_skip(QEMUFile *f, int size); + void qemu_update_position(QEMUFile *f, size_t size); + void qemu_file_reset_rate_limit(QEMUFile *f); ++void qemu_file_update_transfer(QEMUFile *f, int64_t len); + void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); + int64_t qemu_file_get_rate_limit(QEMUFile *f); + void qemu_file_set_error(QEMUFile *f, int ret); +-- +2.19.1 diff --git a/migration-add-speed-limit-for-multifd-migration.patch b/migration-add-speed-limit-for-multifd-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..690d9c9cf095cafdb2ff18025d70ee57a2527de2 --- /dev/null +++ b/migration-add-speed-limit-for-multifd-migration.patch @@ -0,0 +1,127 @@ +From bc5780480db9e38699df0b4697e60a9f36258dc4 Mon Sep 17 00:00:00 2001 +From: Ivan Ren +Date: Tue, 30 Jul 2019 13:33:35 +0800 +Subject: [PATCH 03/10] migration: add speed limit for multifd migration + +Limit the speed of multifd migration through common speed limitation +qemu file. + +Change-Id: Id2abfc7ea85679bd53130a43043cc70179a52e87 +Signed-off-by: Ivan Ren +Message-Id: <1564464816-21804-3-git-send-email-ivanren@tencent.com> +Reviewed-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 889148dd..88ddd2bb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -922,7 +922,7 @@ struct { + * false. + */ + +-static int multifd_send_pages(void) ++static int multifd_send_pages(RAMState *rs) + { + int i; + static int next_channel; +@@ -954,6 +954,7 @@ static int multifd_send_pages(void) + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; ++ qemu_file_update_transfer(rs->f, transferred); + ram_counters.multifd_bytes += transferred; + ram_counters.transferred += transferred;; + qemu_mutex_unlock(&p->mutex); +@@ -962,7 +963,7 @@ static int multifd_send_pages(void) + return 1; + } + +-static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) ++static int multifd_queue_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + +@@ -981,12 +982,12 @@ static int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + } + } + +- if (multifd_send_pages() < 0) { ++ if (multifd_send_pages(rs) < 0) { + return -1; + } + + if (pages->block != block) { +- return multifd_queue_page(block, offset); ++ return multifd_queue_page(rs, block, offset); + } + + return 1; +@@ -1054,7 +1055,7 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-static void multifd_send_sync_main(void) ++static void multifd_send_sync_main(RAMState *rs) + { + int i; + +@@ -1062,7 +1063,7 @@ static void multifd_send_sync_main(void) + return; + } + if (multifd_send_state->pages->used) { +- if (multifd_send_pages() < 0) { ++ if (multifd_send_pages(rs) < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; + } +@@ -1083,6 +1084,7 @@ static void multifd_send_sync_main(void) + p->packet_num = multifd_send_state->packet_num++; + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; ++ qemu_file_update_transfer(rs->f, p->packet_len); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +@@ -2079,7 +2081,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) + static int ram_save_multifd_page(RAMState *rs, RAMBlock *block, + ram_addr_t offset) + { +- if (multifd_queue_page(block, offset) < 0) { ++ if (multifd_queue_page(rs, block, offset) < 0) { + return -1; + } + ram_counters.normal++; +@@ -3482,7 +3484,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(); ++ multifd_send_sync_main(*rsp); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3570,7 +3572,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(); ++ multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_counters.transferred += 8; +@@ -3629,7 +3631,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + + rcu_read_unlock(); + +- multifd_send_sync_main(); ++ multifd_send_sync_main(rs); + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +-- +2.19.1 diff --git a/migration-always-initialise-ram_counters-for-a-new-m.patch b/migration-always-initialise-ram_counters-for-a-new-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..ccd0db9ab87204ca41e17c1120618748eb4383fe --- /dev/null +++ b/migration-always-initialise-ram_counters-for-a-new-m.patch @@ -0,0 +1,125 @@ +From af2aa4f553565ae6b2248204c154748f38ec4746 Mon Sep 17 00:00:00 2001 +From: Ivan Ren +Date: Fri, 2 Aug 2019 18:18:41 +0800 +Subject: [PATCH 01/10] migration: always initialise ram_counters for a new + migration + +This patch fix a multifd migration bug in migration speed calculation, this +problem can be reproduced as follows: +1. start a vm and give a heavy memory write stress to prevent the vm be + successfully migrated to destination +2. begin a migration with multifd +3. migrate for a long time [actually, this can be measured by transferred bytes] +4. migrate cancel +5. begin a new migration with multifd, the migration will directly run into + migration_completion phase + +Reason as follows: + +Migration update bandwidth and s->threshold_size in function +migration_update_counters after BUFFER_DELAY time: + + current_bytes = migration_total_bytes(s); + transferred = current_bytes - s->iteration_initial_bytes; + time_spent = current_time - s->iteration_start_time; + bandwidth = (double)transferred / time_spent; + s->threshold_size = bandwidth * s->parameters.downtime_limit; + +In multifd migration, migration_total_bytes function return +qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes. +s->iteration_initial_bytes will be initialized to 0 at every new migration, +but ram_counters is a global variable, and history migration data will be +accumulated. So if the ram_counters.multifd_bytes is big enough, it may lead +pending_size >= s->threshold_size become false in migration_iteration_run +after the first migration_update_counters. + +Change-Id: Ib153d8676a5b82650bfb1156060e09f0d29f3ac6 +Signed-off-by: Ivan Ren +Reviewed-by: Juan Quintela +Reviewed-by: Wei Yang +Suggested-by: Wei Yang +Message-Id: <1564741121-1840-1-git-send-email-ivanren@tencent.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/migration.c | 25 +++++++++++++++++++------ + migration/savevm.c | 1 + + 2 files changed, 20 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8a607fe1..bea9b1d7 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1908,6 +1908,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + } + + migrate_init(s); ++ /* ++ * set ram_counters memory to zero for a ++ * new migration ++ */ ++ memset(&ram_counters, 0, sizeof(ram_counters)); + + return true; + } +@@ -3025,6 +3030,17 @@ static void migration_calculate_complete(MigrationState *s) + } + } + ++static void update_iteration_initial_status(MigrationState *s) ++{ ++ /* ++ * Update these three fields at the same time to avoid mismatch info lead ++ * wrong speed calculation. ++ */ ++ s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ s->iteration_initial_bytes = migration_total_bytes(s); ++ s->iteration_initial_pages = ram_get_total_transferred_pages(); ++} ++ + static void migration_update_counters(MigrationState *s, + int64_t current_time) + { +@@ -3060,9 +3076,7 @@ static void migration_update_counters(MigrationState *s, + + qemu_file_reset_rate_limit(s->to_dst_file); + +- s->iteration_start_time = current_time; +- s->iteration_initial_bytes = current_bytes; +- s->iteration_initial_pages = ram_get_total_transferred_pages(); ++ update_iteration_initial_status(s); + + trace_migrate_transferred(transferred, time_spent, + bandwidth, s->threshold_size); +@@ -3186,7 +3200,7 @@ static void *migration_thread(void *opaque) + rcu_register_thread(); + + object_ref(OBJECT(s)); +- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ update_iteration_initial_status(s); + + qemu_savevm_state_header(s->to_dst_file); + +@@ -3251,8 +3265,7 @@ static void *migration_thread(void *opaque) + * the local variables. This is important to avoid + * breaking transferred_bytes and bandwidth calculation + */ +- s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +- s->iteration_initial_bytes = 0; ++ update_iteration_initial_status(s); + } + + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +diff --git a/migration/savevm.c b/migration/savevm.c +index 79ed44d4..480c511b 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1424,6 +1424,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + } + + migrate_init(ms); ++ memset(&ram_counters, 0, sizeof(ram_counters)); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +-- +2.19.1 diff --git a/migration-colo-fix-use-after-free-of-local_err.patch b/migration-colo-fix-use-after-free-of-local_err.patch new file mode 100644 index 0000000000000000000000000000000000000000..c03ceb5120bc3069ac123cc9c2702653c4d2da17 --- /dev/null +++ b/migration-colo-fix-use-after-free-of-local_err.patch @@ -0,0 +1,33 @@ +From 663e9b5f25d22834260a0686f77a27c957cd7b2f Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 24 Mar 2020 18:36:28 +0300 +Subject: [PATCH 07/14] migration/colo: fix use after free of local_err + +local_err is used again in secondary_vm_do_failover() after +replication_stop_all(), so we must zero it. Otherwise try to set +non-NULL local_err will crash. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200324153630.11882-5-vsementsov@virtuozzo.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Peng Liang +--- + migration/colo.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/colo.c b/migration/colo.c +index 9f84b1fa3c0f..761b3544d472 100644 +--- a/migration/colo.c ++++ b/migration/colo.c +@@ -89,6 +89,7 @@ static void secondary_vm_do_failover(void) + replication_stop_all(true, &local_err); + if (local_err) { + error_report_err(local_err); ++ local_err = NULL; + } + + /* Notify all filters of all NIC to do checkpoint */ +-- +2.26.2 + diff --git a/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch b/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..8028a29dd7141ce2fc2e139559c1a54661b31109 --- /dev/null +++ b/migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch @@ -0,0 +1,54 @@ +From 17b0582ebba622afbd8f454bbee8141ed2785f13 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:21:58 +0800 +Subject: [PATCH] migration/dirtyrate: Add RamblockDirtyInfo to store sampled + page info + +Add RamblockDirtyInfo to store sampled page info of each ramblock. + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: David Edmondson +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-4-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.h | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +index 84ab9409ac..8707df852d 100644 +--- a/migration/dirtyrate.h ++++ b/migration/dirtyrate.h +@@ -19,10 +19,28 @@ + */ + #define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 + ++/* ++ * Record ramblock idstr ++ */ ++#define RAMBLOCK_INFO_MAX_LEN 256 ++ + struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ + }; + ++/* ++ * Store dirtypage info for each ramblock. ++ */ ++struct RamblockDirtyInfo { ++ char idstr[RAMBLOCK_INFO_MAX_LEN]; /* idstr for each ramblock */ ++ uint8_t *ramblock_addr; /* base address of ramblock we measure */ ++ uint64_t ramblock_pages; /* ramblock size in TARGET_PAGE_SIZE */ ++ uint64_t *sample_page_vfn; /* relative offset address for sampled page */ ++ uint64_t sample_pages_count; /* count of sampled pages */ ++ uint64_t sample_dirty_count; /* count of dirty pages we measure */ ++ uint32_t *hash_result; /* array of hash result for sampled pages */ ++}; ++ + void *get_dirtyrate_thread(void *arg); + #endif +-- +2.27.0 + diff --git a/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch b/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d56f8029ca9b35243e87984538c93113cd4513a --- /dev/null +++ b/migration-dirtyrate-Add-dirtyrate-statistics-series-.patch @@ -0,0 +1,93 @@ +From d1340703e127c02e9a586143039507ba10d73cfb Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:21:59 +0800 +Subject: [PATCH] migration/dirtyrate: Add dirtyrate statistics series + functions + +Add dirtyrate statistics functions to record/update dirtyrate info. + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-5-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 32 ++++++++++++++++++++++++++++++++ + migration/dirtyrate.h | 12 ++++++++++++ + 2 files changed, 44 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 44a60bf10d..cbb323d6ec 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -23,6 +23,7 @@ + #include "dirtyrate.h" + + static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; ++static struct DirtyRateStat DirtyStat; + + static int dirtyrate_set_state(int *state, int old_state, int new_state) + { +@@ -34,6 +35,37 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) + } + } + ++static void reset_dirtyrate_stat(void) ++{ ++ DirtyStat.total_dirty_samples = 0; ++ DirtyStat.total_sample_count = 0; ++ DirtyStat.total_block_mem_MB = 0; ++ DirtyStat.dirty_rate = -1; ++ DirtyStat.start_time = 0; ++ DirtyStat.calc_time = 0; ++} ++ ++static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) ++{ ++ DirtyStat.total_dirty_samples += info->sample_dirty_count; ++ DirtyStat.total_sample_count += info->sample_pages_count; ++ /* size of total pages in MB */ ++ DirtyStat.total_block_mem_MB += (info->ramblock_pages * ++ TARGET_PAGE_SIZE) >> 20; ++} ++ ++static void update_dirtyrate(uint64_t msec) ++{ ++ uint64_t dirtyrate; ++ uint64_t total_dirty_samples = DirtyStat.total_dirty_samples; ++ uint64_t total_sample_count = DirtyStat.total_sample_count; ++ uint64_t total_block_mem_MB = DirtyStat.total_block_mem_MB; ++ ++ dirtyrate = total_dirty_samples * total_block_mem_MB * ++ 1000 / (total_sample_count * msec); ++ ++ DirtyStat.dirty_rate = dirtyrate; ++} + + static void calculate_dirtyrate(struct DirtyRateConfig config) + { +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +index 8707df852d..312debca6f 100644 +--- a/migration/dirtyrate.h ++++ b/migration/dirtyrate.h +@@ -42,5 +42,17 @@ struct RamblockDirtyInfo { + uint32_t *hash_result; /* array of hash result for sampled pages */ + }; + ++/* ++ * Store calculation statistics for each measure. ++ */ ++struct DirtyRateStat { ++ uint64_t total_dirty_samples; /* total dirty sampled page */ ++ uint64_t total_sample_count; /* total sampled pages */ ++ uint64_t total_block_mem_MB; /* size of total sampled pages in MB */ ++ int64_t dirty_rate; /* dirty rate in MB/s */ ++ int64_t start_time; /* calculation start time in units of second */ ++ int64_t calc_time; /* time duration of two sampling in units of second */ ++}; ++ + void *get_dirtyrate_thread(void *arg); + #endif +-- +2.27.0 + diff --git a/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch b/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch new file mode 100644 index 0000000000000000000000000000000000000000..79d825c8a34d1229876b03e1ca64e464d7e0d91c --- /dev/null +++ b/migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch @@ -0,0 +1,99 @@ +From 8a36332d38c0c0ba6b7d8c096367a4ec7c94e522 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:07 +0800 +Subject: [PATCH] migration/dirtyrate: Add trace_calls to make it easier to + debug + +Add trace_calls to make it easier to debug + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: David Edmondson +Message-Id: <1600237327-33618-13-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 9 +++++++++ + migration/trace-events | 8 ++++++++ + 2 files changed, 17 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 9d9155f8ab..80936a4ca6 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -22,6 +22,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "migration.h" + #include "ram.h" ++#include "trace.h" + #include "dirtyrate.h" + + static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; +@@ -54,6 +55,7 @@ static bool is_sample_period_valid(int64_t sec) + static int dirtyrate_set_state(int *state, int old_state, int new_state) + { + assert(new_state < DIRTY_RATE_STATUS__MAX); ++ trace_dirtyrate_set_state(DirtyRateStatus_str(new_state)); + if (atomic_cmpxchg(state, old_state, new_state) == old_state) { + return 0; + } else { +@@ -76,6 +78,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) + info->start_time = DirtyStat.start_time; + info->calc_time = DirtyStat.calc_time; + ++ trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState)); ++ + return info; + } + +@@ -123,6 +127,7 @@ static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, + crc = crc32(0, (info->ramblock_addr + + vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); + ++ trace_get_ramblock_vfn_hash(info->idstr, vfn, crc); + return crc; + } + +@@ -201,6 +206,8 @@ static bool skip_sample_ramblock(RAMBlock *block) + * Sample only blocks larger than MIN_RAMBLOCK_SIZE. + */ + if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { ++ trace_skip_sample_ramblock(block->idstr, ++ qemu_ram_get_used_length(block)); + return true; + } + +@@ -260,6 +267,7 @@ static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) + for (i = 0; i < info->sample_pages_count; i++) { + crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); + if (crc != info->hash_result[i]) { ++ trace_calc_page_dirty_rate(info->idstr, crc, info->hash_result[i]); + info->sample_dirty_count++; + } + } +@@ -285,6 +293,7 @@ find_block_matched(RAMBlock *block, int count, + if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || + infos[i].ramblock_pages != + (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { ++ trace_find_page_matched(block->idstr); + return NULL; + } + +diff --git a/migration/trace-events b/migration/trace-events +index d8e54c367a..69620c43c2 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -296,3 +296,11 @@ dirty_bitmap_load_bits_zeroes(void) "" + dirty_bitmap_load_header(uint32_t flags) "flags 0x%x" + dirty_bitmap_load_enter(void) "" + dirty_bitmap_load_success(void) "" ++ ++# dirtyrate.c ++dirtyrate_set_state(const char *new_state) "new state %s" ++query_dirty_rate_info(const char *new_state) "current state %s" ++get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock name: %s, vfn: %"PRIu64 ", crc: %" PRIu32 ++calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 ++skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 ++find_page_matched(const char *idstr) "ramblock %s addr or size changed" +-- +2.27.0 + diff --git a/migration-dirtyrate-Compare-page-hash-results-for-re.patch b/migration-dirtyrate-Compare-page-hash-results-for-re.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9277d5faa65a35cc1d044023683ea37e452fc65 --- /dev/null +++ b/migration-dirtyrate-Compare-page-hash-results-for-re.patch @@ -0,0 +1,95 @@ +From 949612c5bbc5414970aed7d7ec9390a058ee2246 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:02 +0800 +Subject: [PATCH] migration/dirtyrate: Compare page hash results for recorded + sampled page + +Compare page hash results for recorded sampled page. + +Signed-off-by: Chuan Zheng +Signed-off-by: YanYing Zhuang +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-8-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 63 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 63 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index f93601f8ab..0412f825dc 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -177,6 +177,69 @@ out: + return ret; + } + ++static void calc_page_dirty_rate(struct RamblockDirtyInfo *info) ++{ ++ uint32_t crc; ++ int i; ++ ++ for (i = 0; i < info->sample_pages_count; i++) { ++ crc = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]); ++ if (crc != info->hash_result[i]) { ++ info->sample_dirty_count++; ++ } ++ } ++} ++ ++static struct RamblockDirtyInfo * ++find_block_matched(RAMBlock *block, int count, ++ struct RamblockDirtyInfo *infos) ++{ ++ int i; ++ struct RamblockDirtyInfo *matched; ++ ++ for (i = 0; i < count; i++) { ++ if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) { ++ break; ++ } ++ } ++ ++ if (i == count) { ++ return NULL; ++ } ++ ++ if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) || ++ infos[i].ramblock_pages != ++ (qemu_ram_get_used_length(block) >> TARGET_PAGE_BITS)) { ++ return NULL; ++ } ++ ++ matched = &infos[i]; ++ ++ return matched; ++} ++ ++static bool compare_page_hash_info(struct RamblockDirtyInfo *info, ++ int block_count) ++{ ++ struct RamblockDirtyInfo *block_dinfo = NULL; ++ RAMBlock *block = NULL; ++ ++ RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ block_dinfo = find_block_matched(block, block_count, info); ++ if (block_dinfo == NULL) { ++ continue; ++ } ++ calc_page_dirty_rate(block_dinfo); ++ update_dirtyrate_stat(block_dinfo); ++ } ++ ++ if (DirtyStat.total_sample_count == 0) { ++ return false; ++ } ++ ++ return true; ++} ++ + static void calculate_dirtyrate(struct DirtyRateConfig config) + { + /* todo */ +-- +2.27.0 + diff --git a/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch b/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch new file mode 100644 index 0000000000000000000000000000000000000000..1fcb2c07c2bfd83d91ee582ec79c39d759c8335b --- /dev/null +++ b/migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch @@ -0,0 +1,83 @@ +From 18102266fb18c4bfcdd4760e7111ca03a7520588 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:05 +0800 +Subject: [PATCH] migration/dirtyrate: Implement calculate_dirtyrate() function + +Implement calculate_dirtyrate() function. + +Signed-off-by: Chuan Zheng +Signed-off-by: YanYing Zhuang +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-11-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 45 +++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 43 insertions(+), 2 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 485d6467c9..c7a389a527 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -162,6 +162,21 @@ static void get_ramblock_dirty_info(RAMBlock *block, + strcpy(info->idstr, qemu_ram_get_idstr(block)); + } + ++static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count) ++{ ++ int i; ++ ++ if (!infos) { ++ return; ++ } ++ ++ for (i = 0; i < count; i++) { ++ g_free(infos[i].sample_page_vfn); ++ g_free(infos[i].hash_result); ++ } ++ g_free(infos); ++} ++ + static bool skip_sample_ramblock(RAMBlock *block) + { + /* +@@ -287,8 +302,34 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + + static void calculate_dirtyrate(struct DirtyRateConfig config) + { +- /* todo */ +- return; ++ struct RamblockDirtyInfo *block_dinfo = NULL; ++ int block_count = 0; ++ int64_t msec = 0; ++ int64_t initial_time; ++ ++ rcu_register_thread(); ++ reset_dirtyrate_stat(); ++ rcu_read_lock(); ++ initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { ++ goto out; ++ } ++ rcu_read_unlock(); ++ ++ msec = config.sample_period_seconds * 1000; ++ msec = set_sample_page_period(msec, initial_time); ++ ++ rcu_read_lock(); ++ if (!compare_page_hash_info(block_dinfo, block_count)) { ++ goto out; ++ } ++ ++ update_dirtyrate(msec); ++ ++out: ++ rcu_read_unlock(); ++ free_ramblock_dirty_info(block_dinfo, block_count); ++ rcu_unregister_thread(); + } + + void *get_dirtyrate_thread(void *arg) +-- +2.27.0 + diff --git a/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch b/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch new file mode 100644 index 0000000000000000000000000000000000000000..04893d36e579d9c4b78dfa9d1bd488d3a842cddb --- /dev/null +++ b/migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch @@ -0,0 +1,164 @@ +From 1f5f7156988cee6e678eff253df0e79788c950d7 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:06 +0800 +Subject: [PATCH] migration/dirtyrate: Implement + qmp_cal_dirty_rate()/qmp_get_dirty_rate() function + +Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function which could be called + +Signed-off-by: Chuan Zheng +Message-Id: <1600237327-33618-12-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert + atomic function fixup + Wording fixup in migration.json based on Eric's review +--- + migration/dirtyrate.c | 62 +++++++++++++++++++++++++++++++++++++++++++ + qapi/migration.json | 50 ++++++++++++++++++++++++++++++++++ + 2 files changed, 112 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index c7a389a527..9d9155f8ab 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -61,6 +61,24 @@ static int dirtyrate_set_state(int *state, int old_state, int new_state) + } + } + ++static struct DirtyRateInfo *query_dirty_rate_info(void) ++{ ++ int64_t dirty_rate = DirtyStat.dirty_rate; ++ struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); ++ ++ if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { ++ info->dirty_rate = dirty_rate; ++ } else { ++ info->dirty_rate = -1; ++ } ++ ++ info->status = CalculatingState; ++ info->start_time = DirtyStat.start_time; ++ info->calc_time = DirtyStat.calc_time; ++ ++ return info; ++} ++ + static void reset_dirtyrate_stat(void) + { + DirtyStat.total_dirty_samples = 0; +@@ -318,6 +336,8 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) + + msec = config.sample_period_seconds * 1000; + msec = set_sample_page_period(msec, initial_time); ++ DirtyStat.start_time = initial_time / 1000; ++ DirtyStat.calc_time = msec / 1000; + + rcu_read_lock(); + if (!compare_page_hash_info(block_dinfo, block_count)) { +@@ -353,3 +373,45 @@ void *get_dirtyrate_thread(void *arg) + } + return NULL; + } ++ ++void qmp_calc_dirty_rate(int64_t calc_time, Error **errp) ++{ ++ static struct DirtyRateConfig config; ++ QemuThread thread; ++ int ret; ++ ++ /* ++ * If the dirty rate is already being measured, don't attempt to start. ++ */ ++ if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) { ++ error_setg(errp, "the dirty rate is already being measured."); ++ return; ++ } ++ ++ if (!is_sample_period_valid(calc_time)) { ++ error_setg(errp, "calc-time is out of range[%d, %d].", ++ MIN_FETCH_DIRTYRATE_TIME_SEC, ++ MAX_FETCH_DIRTYRATE_TIME_SEC); ++ return; ++ } ++ ++ /* ++ * Init calculation state as unstarted. ++ */ ++ ret = dirtyrate_set_state(&CalculatingState, CalculatingState, ++ DIRTY_RATE_STATUS_UNSTARTED); ++ if (ret == -1) { ++ error_setg(errp, "init dirty rate calculation state failed."); ++ return; ++ } ++ ++ config.sample_period_seconds = calc_time; ++ config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES; ++ qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread, ++ (void *)&config, QEMU_THREAD_DETACHED); ++} ++ ++struct DirtyRateInfo *qmp_query_dirty_rate(Error **errp) ++{ ++ return query_dirty_rate_info(); ++} +diff --git a/qapi/migration.json b/qapi/migration.json +index fdddde0af7..76f5b42493 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1462,3 +1462,53 @@ + ## + { 'enum': 'DirtyRateStatus', + 'data': [ 'unstarted', 'measuring', 'measured'] } ++ ++## ++# @DirtyRateInfo: ++# ++# Information about current dirty page rate of vm. ++# ++# @dirty-rate: @dirtyrate describing the dirty page rate of vm ++# in units of MB/s. ++# If this field returns '-1', it means querying has not ++# yet started or completed. ++# ++# @status: status containing dirtyrate query status includes ++# 'unstarted' or 'measuring' or 'measured' ++# ++# @start-time: start time in units of second for calculation ++# ++# @calc-time: time in units of second for sample dirty pages ++# ++# Since: 5.2 ++# ++## ++{ 'struct': 'DirtyRateInfo', ++ 'data': {'dirty-rate': 'int64', ++ 'status': 'DirtyRateStatus', ++ 'start-time': 'int64', ++ 'calc-time': 'int64'} } ++ ++## ++# @calc-dirty-rate: ++# ++# start calculating dirty page rate for vm ++# ++# @calc-time: time in units of second for sample dirty pages ++# ++# Since: 5.2 ++# ++# Example: ++# {"command": "calc-dirty-rate", "data": {"calc-time": 1} } ++# ++## ++{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} } ++ ++## ++# @query-dirty-rate: ++# ++# query dirty page rate in units of MB/s for vm ++# ++# Since: 5.2 ++## ++{ 'command': 'query-dirty-rate', 'returns': 'DirtyRateInfo' } +-- +2.27.0 + diff --git a/migration-dirtyrate-Implement-set_sample_page_period.patch b/migration-dirtyrate-Implement-set_sample_page_period.patch new file mode 100644 index 0000000000000000000000000000000000000000..fdb9c22431a0d74a4055f01839a42beaa2fa1f51 --- /dev/null +++ b/migration-dirtyrate-Implement-set_sample_page_period.patch @@ -0,0 +1,75 @@ +From 905082a502e0600d40e784df2443ae99948cf52d Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:04 +0800 +Subject: [PATCH] migration/dirtyrate: Implement set_sample_page_period() and + is_sample_period_valid() + +Implement is_sample_period_valid() to check if the sample period is vaild and +do set_sample_page_period() to sleep specific time between sample actions. + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: David Edmondson +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-10-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 24 ++++++++++++++++++++++++ + migration/dirtyrate.h | 6 ++++++ + 2 files changed, 30 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 97bb883850..485d6467c9 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -27,6 +27,30 @@ + static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; + static struct DirtyRateStat DirtyStat; + ++static int64_t set_sample_page_period(int64_t msec, int64_t initial_time) ++{ ++ int64_t current_time; ++ ++ current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ if ((current_time - initial_time) >= msec) { ++ msec = current_time - initial_time; ++ } else { ++ g_usleep((msec + initial_time - current_time) * 1000); ++ } ++ ++ return msec; ++} ++ ++static bool is_sample_period_valid(int64_t sec) ++{ ++ if (sec < MIN_FETCH_DIRTYRATE_TIME_SEC || ++ sec > MAX_FETCH_DIRTYRATE_TIME_SEC) { ++ return false; ++ } ++ ++ return true; ++} ++ + static int dirtyrate_set_state(int *state, int old_state, int new_state) + { + assert(new_state < DIRTY_RATE_STATUS__MAX); +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +index be5b8ec2b1..6ec429534d 100644 +--- a/migration/dirtyrate.h ++++ b/migration/dirtyrate.h +@@ -29,6 +29,12 @@ + */ + #define MIN_RAMBLOCK_SIZE 128 + ++/* ++ * Take 1s as minimum time for calculation duration ++ */ ++#define MIN_FETCH_DIRTYRATE_TIME_SEC 1 ++#define MAX_FETCH_DIRTYRATE_TIME_SEC 60 ++ + struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ +-- +2.27.0 + diff --git a/migration-dirtyrate-Record-hash-results-for-each-sam.patch b/migration-dirtyrate-Record-hash-results-for-each-sam.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a5a8a9476155c56c740941143b899e80b5a2472 --- /dev/null +++ b/migration-dirtyrate-Record-hash-results-for-each-sam.patch @@ -0,0 +1,149 @@ +From 751dbc44b4ac0e0c0bce2f53d2ee79a6e6318188 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:01 +0800 +Subject: [PATCH] migration/dirtyrate: Record hash results for each sampled + page + +Record hash results for each sampled page, crc32 is taken to calculate +hash results for each sampled length in TARGET_PAGE_SIZE. + +Signed-off-by: Chuan Zheng +Signed-off-by: YanYing Zhuang +Reviewed-by: David Edmondson +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-7-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 109 ++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 109 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 1ccc71077d..f93601f8ab 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -10,6 +10,7 @@ + * See the COPYING file in the top-level directory. + */ + ++#include + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "cpu.h" +@@ -68,6 +69,114 @@ static void update_dirtyrate(uint64_t msec) + DirtyStat.dirty_rate = dirtyrate; + } + ++/* ++ * get hash result for the sampled memory with length of TARGET_PAGE_SIZE ++ * in ramblock, which starts from ramblock base address. ++ */ ++static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info, ++ uint64_t vfn) ++{ ++ uint32_t crc; ++ ++ crc = crc32(0, (info->ramblock_addr + ++ vfn * TARGET_PAGE_SIZE), TARGET_PAGE_SIZE); ++ ++ return crc; ++} ++ ++static bool save_ramblock_hash(struct RamblockDirtyInfo *info) ++{ ++ unsigned int sample_pages_count; ++ int i; ++ GRand *rand; ++ ++ sample_pages_count = info->sample_pages_count; ++ ++ /* ramblock size less than one page, return success to skip this ramblock */ ++ if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) { ++ return true; ++ } ++ ++ info->hash_result = g_try_malloc0_n(sample_pages_count, ++ sizeof(uint32_t)); ++ if (!info->hash_result) { ++ return false; ++ } ++ ++ info->sample_page_vfn = g_try_malloc0_n(sample_pages_count, ++ sizeof(uint64_t)); ++ if (!info->sample_page_vfn) { ++ g_free(info->hash_result); ++ return false; ++ } ++ ++ rand = g_rand_new(); ++ for (i = 0; i < sample_pages_count; i++) { ++ info->sample_page_vfn[i] = g_rand_int_range(rand, 0, ++ info->ramblock_pages - 1); ++ info->hash_result[i] = get_ramblock_vfn_hash(info, ++ info->sample_page_vfn[i]); ++ } ++ g_rand_free(rand); ++ ++ return true; ++} ++ ++static void get_ramblock_dirty_info(RAMBlock *block, ++ struct RamblockDirtyInfo *info, ++ struct DirtyRateConfig *config) ++{ ++ uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes; ++ ++ /* Right shift 30 bits to calc ramblock size in GB */ ++ info->sample_pages_count = (qemu_ram_get_used_length(block) * ++ sample_pages_per_gigabytes) >> 30; ++ /* Right shift TARGET_PAGE_BITS to calc page count */ ++ info->ramblock_pages = qemu_ram_get_used_length(block) >> ++ TARGET_PAGE_BITS; ++ info->ramblock_addr = qemu_ram_get_host_addr(block); ++ strcpy(info->idstr, qemu_ram_get_idstr(block)); ++} ++ ++static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, ++ struct DirtyRateConfig config, ++ int *block_count) ++{ ++ struct RamblockDirtyInfo *info = NULL; ++ struct RamblockDirtyInfo *dinfo = NULL; ++ RAMBlock *block = NULL; ++ int total_count = 0; ++ int index = 0; ++ bool ret = false; ++ ++ RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ total_count++; ++ } ++ ++ dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo)); ++ if (dinfo == NULL) { ++ goto out; ++ } ++ ++ RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ if (index >= total_count) { ++ break; ++ } ++ info = &dinfo[index]; ++ get_ramblock_dirty_info(block, info, &config); ++ if (!save_ramblock_hash(info)) { ++ goto out; ++ } ++ index++; ++ } ++ ret = true; ++ ++out: ++ *block_count = index; ++ *block_dinfo = dinfo; ++ return ret; ++} ++ + static void calculate_dirtyrate(struct DirtyRateConfig config) + { + /* todo */ +-- +2.27.0 + diff --git a/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch b/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0ebb2a70e3771da65a340b081094e63318d42fe --- /dev/null +++ b/migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch @@ -0,0 +1,93 @@ +From 466b3eee340f022e53478e706e8d4dc02136b1e1 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:21:57 +0800 +Subject: [PATCH] migration/dirtyrate: add DirtyRateStatus to denote + calculation status + +add DirtyRateStatus to denote calculating status. + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-3-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert + atomic name fixup +--- + migration/dirtyrate.c | 26 ++++++++++++++++++++++++++ + qapi/migration.json | 17 +++++++++++++++++ + 2 files changed, 43 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 29ef663acb..44a60bf10d 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -22,6 +22,19 @@ + #include "migration.h" + #include "dirtyrate.h" + ++static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; ++ ++static int dirtyrate_set_state(int *state, int old_state, int new_state) ++{ ++ assert(new_state < DIRTY_RATE_STATUS__MAX); ++ if (atomic_cmpxchg(state, old_state, new_state) == old_state) { ++ return 0; ++ } else { ++ return -1; ++ } ++} ++ ++ + static void calculate_dirtyrate(struct DirtyRateConfig config) + { + /* todo */ +@@ -31,8 +44,21 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) + void *get_dirtyrate_thread(void *arg) + { + struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; ++ int ret; ++ ++ ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, ++ DIRTY_RATE_STATUS_MEASURING); ++ if (ret == -1) { ++ error_report("change dirtyrate state failed."); ++ return NULL; ++ } + + calculate_dirtyrate(config); + ++ ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, ++ DIRTY_RATE_STATUS_MEASURED); ++ if (ret == -1) { ++ error_report("change dirtyrate state failed."); ++ } + return NULL; + } +diff --git a/qapi/migration.json b/qapi/migration.json +index 9cfbaf8c6c..fdddde0af7 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1445,3 +1445,20 @@ + # Since: 3.0 + ## + { 'command': 'migrate-pause', 'allow-oob': true } ++ ++## ++# @DirtyRateStatus: ++# ++# An enumeration of dirtyrate status. ++# ++# @unstarted: the dirtyrate thread has not been started. ++# ++# @measuring: the dirtyrate thread is measuring. ++# ++# @measured: the dirtyrate thread has measured and results are available. ++# ++# Since: 5.2 ++# ++## ++{ 'enum': 'DirtyRateStatus', ++ 'data': [ 'unstarted', 'measuring', 'measured'] } +-- +2.27.0 + diff --git a/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch b/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch new file mode 100644 index 0000000000000000000000000000000000000000..16660d7dd46f4ec3712f9714de37c41a3c3554f4 --- /dev/null +++ b/migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch @@ -0,0 +1,84 @@ +From 1cee10fe37193c6b5ed4e765a2a6d1e6c1411922 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:00 +0800 +Subject: [PATCH] migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into + ram.h + +RAMBLOCK_FOREACH_MIGRATABLE is need in dirtyrate measure, +move the existing definition up into migration/ram.h + +Signed-off-by: Chuan Zheng +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: David Edmondson +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-6-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 1 + + migration/ram.c | 11 +---------- + migration/ram.h | 10 ++++++++++ + 3 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index cbb323d6ec..1ccc71077d 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -20,6 +20,7 @@ + #include "qemu/rcu_queue.h" + #include "qapi/qapi-commands-migration.h" + #include "migration.h" ++#include "ram.h" + #include "dirtyrate.h" + + static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED; +diff --git a/migration/ram.c b/migration/ram.c +index 848059d9fb..1a33c7b3e2 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -159,21 +159,12 @@ out: + return ret; + } + +-static bool ramblock_is_ignored(RAMBlock *block) ++bool ramblock_is_ignored(RAMBlock *block) + { + return !qemu_ram_is_migratable(block) || + (migrate_ignore_shared() && qemu_ram_is_shared(block)); + } + +-/* Should be holding either ram_list.mutex, or the RCU lock. */ +-#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ +- INTERNAL_RAMBLOCK_FOREACH(block) \ +- if (ramblock_is_ignored(block)) {} else +- +-#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ +- INTERNAL_RAMBLOCK_FOREACH(block) \ +- if (!qemu_ram_is_migratable(block)) {} else +- + #undef RAMBLOCK_FOREACH + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque) +diff --git a/migration/ram.h b/migration/ram.h +index a788ff0e8e..565ec86b1f 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -37,6 +37,16 @@ extern MigrationStats ram_counters; + extern XBZRLECacheStats xbzrle_counters; + extern CompressionStats compression_counters; + ++bool ramblock_is_ignored(RAMBlock *block); ++/* Should be holding either ram_list.mutex, or the RCU lock. */ ++#define RAMBLOCK_FOREACH_NOT_IGNORED(block) \ ++ INTERNAL_RAMBLOCK_FOREACH(block) \ ++ if (ramblock_is_ignored(block)) {} else ++ ++#define RAMBLOCK_FOREACH_MIGRATABLE(block) \ ++ INTERNAL_RAMBLOCK_FOREACH(block) \ ++ if (!qemu_ram_is_migratable(block)) {} else ++ + int xbzrle_cache_resize(int64_t new_size, Error **errp); + uint64_t ram_bytes_remaining(void); + uint64_t ram_bytes_total(void); +-- +2.27.0 + diff --git a/migration-dirtyrate-present-dirty-rate-only-when-que.patch b/migration-dirtyrate-present-dirty-rate-only-when-que.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6d5dd423913c89888c29e530b18009bf1f63f50 --- /dev/null +++ b/migration-dirtyrate-present-dirty-rate-only-when-que.patch @@ -0,0 +1,69 @@ +From ba399ad806d195f31d0b76fa55363a4147459a5b Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Tue, 29 Sep 2020 11:42:18 +0800 +Subject: [PATCH] migration/dirtyrate: present dirty rate only when querying + the rate has completed + +Make dirty_rate field optional, present dirty rate only when querying +the rate has completed. +The qmp results is shown as follow: +@unstarted: +{"return":{"status":"unstarted","start-time":0,"calc-time":0},"id":"libvirt-12"} +@measuring: +{"return":{"status":"measuring","start-time":102931,"calc-time":1},"id":"libvirt-85"} +@measured: +{"return":{"status":"measured","dirty-rate":4,"start-time":150146,"calc-time":1},"id":"libvirt-15"} + +Signed-off-by: Chuan Zheng +Reviewed-by: David Edmondson +Message-Id: <1601350938-128320-3-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 3 +-- + qapi/migration.json | 8 +++----- + 2 files changed, 4 insertions(+), 7 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index f1c007d569..00c8085456 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -69,9 +69,8 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) + struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo)); + + if (atomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) { ++ info->has_dirty_rate = true; + info->dirty_rate = dirty_rate; +- } else { +- info->dirty_rate = -1; + } + + info->status = CalculatingState; +diff --git a/qapi/migration.json b/qapi/migration.json +index 76f5b42493..6844ddfab3 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1468,10 +1468,8 @@ + # + # Information about current dirty page rate of vm. + # +-# @dirty-rate: @dirtyrate describing the dirty page rate of vm +-# in units of MB/s. +-# If this field returns '-1', it means querying has not +-# yet started or completed. ++# @dirty-rate: an estimate of the dirty page rate of the VM in units of ++# MB/s, present only when estimating the rate has completed. + # + # @status: status containing dirtyrate query status includes + # 'unstarted' or 'measuring' or 'measured' +@@ -1484,7 +1482,7 @@ + # + ## + { 'struct': 'DirtyRateInfo', +- 'data': {'dirty-rate': 'int64', ++ 'data': {'*dirty-rate': 'int64', + 'status': 'DirtyRateStatus', + 'start-time': 'int64', + 'calc-time': 'int64'} } +-- +2.27.0 + diff --git a/migration-dirtyrate-record-start_time-and-calc_time-.patch b/migration-dirtyrate-record-start_time-and-calc_time-.patch new file mode 100644 index 0000000000000000000000000000000000000000..a4a4fed2c90eb99565d7712a6565c2284331b29c --- /dev/null +++ b/migration-dirtyrate-record-start_time-and-calc_time-.patch @@ -0,0 +1,71 @@ +From 5de3e40a6c1a4afcc2612ac109326956e7cded63 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Tue, 29 Sep 2020 11:42:17 +0800 +Subject: [PATCH] migration/dirtyrate: record start_time and calc_time while at + the measuring state + +Querying could include both the start-time and the calc-time while at the measuring +state, allowing a caller to determine when they should expect to come back looking +for a result. + +Signed-off-by: Chuan Zheng +Message-Id: <1601350938-128320-2-git-send-email-zhengchuan@huawei.com> +Reviewed-by: David Edmondson +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 80936a4ca6..f1c007d569 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -83,14 +83,14 @@ static struct DirtyRateInfo *query_dirty_rate_info(void) + return info; + } + +-static void reset_dirtyrate_stat(void) ++static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time) + { + DirtyStat.total_dirty_samples = 0; + DirtyStat.total_sample_count = 0; + DirtyStat.total_block_mem_MB = 0; + DirtyStat.dirty_rate = -1; +- DirtyStat.start_time = 0; +- DirtyStat.calc_time = 0; ++ DirtyStat.start_time = start_time; ++ DirtyStat.calc_time = calc_time; + } + + static void update_dirtyrate_stat(struct RamblockDirtyInfo *info) +@@ -335,7 +335,6 @@ static void calculate_dirtyrate(struct DirtyRateConfig config) + int64_t initial_time; + + rcu_register_thread(); +- reset_dirtyrate_stat(); + rcu_read_lock(); + initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) { +@@ -365,6 +364,8 @@ void *get_dirtyrate_thread(void *arg) + { + struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; + int ret; ++ int64_t start_time; ++ int64_t calc_time; + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED, + DIRTY_RATE_STATUS_MEASURING); +@@ -373,6 +374,10 @@ void *get_dirtyrate_thread(void *arg) + return NULL; + } + ++ start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000; ++ calc_time = config.sample_period_seconds; ++ init_dirtyrate_stat(start_time, calc_time); ++ + calculate_dirtyrate(config); + + ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING, +-- +2.27.0 + diff --git a/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch b/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4a2b4ff12bf88e0f37211df631867d0ee6f6a6d --- /dev/null +++ b/migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch @@ -0,0 +1,116 @@ +From 18dbd0efc14aa190b2f4c364fa614b0994af5af0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:21:56 +0800 +Subject: [PATCH] migration/dirtyrate: setup up query-dirtyrate framwork + +Add get_dirtyrate_thread() functions to setup query-dirtyrate +framework. + +Signed-off-by: Chuan Zheng +Signed-off-by: YanYing Zhuang +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: David Edmondson +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-2-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + Makefile.target | 1 + + migration/dirtyrate.c | 38 ++++++++++++++++++++++++++++++++++++++ + migration/dirtyrate.h | 28 ++++++++++++++++++++++++++++ + 3 files changed, 67 insertions(+) + create mode 100644 migration/dirtyrate.c + create mode 100644 migration/dirtyrate.h + +diff --git a/Makefile.target b/Makefile.target +index 933b27453a..5ea840964c 100644 +--- a/Makefile.target ++++ b/Makefile.target +@@ -161,6 +161,7 @@ obj-y += qapi/ + obj-y += memory.o + obj-y += memory_mapping.o + obj-y += migration/ram.o ++obj-y += migration/dirtyrate.o + LIBS := $(libs_softmmu) $(LIBS) + + # Hardware support +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +new file mode 100644 +index 0000000000..29ef663acb +--- /dev/null ++++ b/migration/dirtyrate.c +@@ -0,0 +1,38 @@ ++/* ++ * Dirtyrate implement code ++ * ++ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD. ++ * ++ * Authors: ++ * Chuan Zheng ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "cpu.h" ++#include "qemu/config-file.h" ++#include "exec/memory.h" ++#include "exec/ram_addr.h" ++#include "exec/target_page.h" ++#include "qemu/rcu_queue.h" ++#include "qapi/qapi-commands-migration.h" ++#include "migration.h" ++#include "dirtyrate.h" ++ ++static void calculate_dirtyrate(struct DirtyRateConfig config) ++{ ++ /* todo */ ++ return; ++} ++ ++void *get_dirtyrate_thread(void *arg) ++{ ++ struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg; ++ ++ calculate_dirtyrate(config); ++ ++ return NULL; ++} +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +new file mode 100644 +index 0000000000..84ab9409ac +--- /dev/null ++++ b/migration/dirtyrate.h +@@ -0,0 +1,28 @@ ++/* ++ * Dirtyrate common functions ++ * ++ * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO., LTD. ++ * ++ * Authors: ++ * Chuan Zheng ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_MIGRATION_DIRTYRATE_H ++#define QEMU_MIGRATION_DIRTYRATE_H ++ ++/* ++ * Sample 512 pages per GB as default. ++ * TODO: Make it configurable. ++ */ ++#define DIRTYRATE_DEFAULT_SAMPLE_PAGES 512 ++ ++struct DirtyRateConfig { ++ uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ ++ int64_t sample_period_seconds; /* time duration between two sampling */ ++}; ++ ++void *get_dirtyrate_thread(void *arg); ++#endif +-- +2.27.0 + diff --git a/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch b/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bdb51b5d936d3a51bc9321815f98b742070b3ab --- /dev/null +++ b/migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch @@ -0,0 +1,43 @@ +From 91eed005e1af25f49ab38732cd3c9ea8071331b0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Fri, 30 Oct 2020 11:58:01 +0800 +Subject: [PATCH] migration/dirtyrate: simplify includes in dirtyrate.c + +Remove redundant blank line which is left by Commit 662770af7c6e8c, +also take this opportunity to remove redundant includes in dirtyrate.c. + +Signed-off-by: Chuan Zheng +Message-Id: <1604030281-112946-1-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 00c8085456..9a6d0e2cc6 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -10,17 +10,16 @@ + * See the COPYING file in the top-level directory. + */ + +-#include + #include "qemu/osdep.h" ++#include + #include "qapi/error.h" + #include "cpu.h" +-#include "qemu/config-file.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" + #include "exec/target_page.h" + #include "qemu/rcu_queue.h" ++#include "qemu/error-report.h" + #include "qapi/qapi-commands-migration.h" +-#include "migration.h" + #include "ram.h" + #include "trace.h" + #include "dirtyrate.h" +-- +2.27.0 + diff --git a/migration-dirtyrate-skip-sampling-ramblock-with-size.patch b/migration-dirtyrate-skip-sampling-ramblock-with-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e649e3cdef1a3283d1ed0fde909902d5f3274a7 --- /dev/null +++ b/migration-dirtyrate-skip-sampling-ramblock-with-size.patch @@ -0,0 +1,92 @@ +From 0fcff073292e78e08ee24eb784783156b2974f4a Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 16 Sep 2020 14:22:03 +0800 +Subject: [PATCH] migration/dirtyrate: skip sampling ramblock with size below + MIN_RAMBLOCK_SIZE + +In order to sample real RAM, skip ramblock with size below MIN_RAMBLOCK_SIZE +which is set as 128M. + +Signed-off-by: Chuan Zheng +Reviewed-by: David Edmondson +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Li Qiang +Message-Id: <1600237327-33618-9-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/dirtyrate.c | 21 +++++++++++++++++++++ + migration/dirtyrate.h | 5 +++++ + 2 files changed, 26 insertions(+) + +diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c +index 0412f825dc..97bb883850 100644 +--- a/migration/dirtyrate.c ++++ b/migration/dirtyrate.c +@@ -138,6 +138,18 @@ static void get_ramblock_dirty_info(RAMBlock *block, + strcpy(info->idstr, qemu_ram_get_idstr(block)); + } + ++static bool skip_sample_ramblock(RAMBlock *block) ++{ ++ /* ++ * Sample only blocks larger than MIN_RAMBLOCK_SIZE. ++ */ ++ if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) { ++ return true; ++ } ++ ++ return false; ++} ++ + static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + struct DirtyRateConfig config, + int *block_count) +@@ -150,6 +162,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + bool ret = false; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ if (skip_sample_ramblock(block)) { ++ continue; ++ } + total_count++; + } + +@@ -159,6 +174,9 @@ static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo, + } + + RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ if (skip_sample_ramblock(block)) { ++ continue; ++ } + if (index >= total_count) { + break; + } +@@ -225,6 +243,9 @@ static bool compare_page_hash_info(struct RamblockDirtyInfo *info, + RAMBlock *block = NULL; + + RAMBLOCK_FOREACH_MIGRATABLE(block) { ++ if (skip_sample_ramblock(block)) { ++ continue; ++ } + block_dinfo = find_block_matched(block, block_count, info); + if (block_dinfo == NULL) { + continue; +diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h +index 312debca6f..be5b8ec2b1 100644 +--- a/migration/dirtyrate.h ++++ b/migration/dirtyrate.h +@@ -24,6 +24,11 @@ + */ + #define RAMBLOCK_INFO_MAX_LEN 256 + ++/* ++ * Minimum RAMBlock size to sample, in megabytes. ++ */ ++#define MIN_RAMBLOCK_SIZE 128 ++ + struct DirtyRateConfig { + uint64_t sample_pages_per_gigabytes; /* sample pages per GB */ + int64_t sample_period_seconds; /* time duration between two sampling */ +-- +2.27.0 + diff --git a/migration-fix-COLO-broken-caused-by-a-previous-commi.patch b/migration-fix-COLO-broken-caused-by-a-previous-commi.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ac65d9c79a43e8233c80d633dc02835e9f8344c --- /dev/null +++ b/migration-fix-COLO-broken-caused-by-a-previous-commi.patch @@ -0,0 +1,39 @@ +From c635692b4e75db3f9547f6d4ed9d73d1cdb34989 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:43:45 +0800 +Subject: [PATCH] migration: fix COLO broken caused by a previous commit + +This commit "migration: Create migration_is_running()" broke +COLO. Becuase there is a process broken by this commit. + +colo_process_checkpoint + ->colo_do_checkpoint_transaction + ->migrate_set_block_enabled + ->qmp_migrate_set_capabilities + +It can be fixed by make COLO process as an exception, +Maybe we need a better way to fix it. + +Cc: Juan Quintela +Signed-off-by: zhanghailiang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/migration.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 923a1d9d3f..0e396f22b4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -833,7 +833,6 @@ bool migration_is_running(int state) + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: + case MIGRATION_STATUS_CANCELLING: +- case MIGRATION_STATUS_COLO: + return true; + + default: +-- +2.27.0 + diff --git a/migration-fix-cleanup_bh-leak-on-resume.patch b/migration-fix-cleanup_bh-leak-on-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b75ed01b8faa4c3d5b9d1e17e6d3d205daa2396 --- /dev/null +++ b/migration-fix-cleanup_bh-leak-on-resume.patch @@ -0,0 +1,64 @@ +From 1d7c227bbb24665cea03f96a984ad6be223ac40c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 25 Mar 2020 19:47:21 +0100 +Subject: [PATCH 2/5] migration: fix cleanup_bh leak on resume +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since commit 8c6b0356b53977bcfdea5299db07884915425b0c ("util/async: +make bh_aio_poll() O(1)"), migration-test reveals a leak: + +QTEST_QEMU_BINARY=x86_64-softmmu/qemu-system-x86_64 +tests/qtest/migration-test -p /x86_64/migration/postcopy/recovery +tests/qtest/libqtest.c:140: kill_qemu() tried to terminate QEMU +process but encountered exit status 1 (expected 0) + +================================================================= +==2082571==ERROR: LeakSanitizer: detected memory leaks + +Direct leak of 40 byte(s) in 1 object(s) allocated from: + #0 0x7f25971dfc58 in __interceptor_malloc (/lib64/libasan.so.5+0x10dc58) + #1 0x7f2596d08358 in g_malloc (/lib64/libglib-2.0.so.0+0x57358) + #2 0x560970d006f8 in qemu_bh_new /home/elmarco/src/qemu/util/main-loop.c:532 + #3 0x5609704afa02 in migrate_fd_connect +/home/elmarco/src/qemu/migration/migration.c:3407 + #4 0x5609704b6b6f in migration_channel_connect +/home/elmarco/src/qemu/migration/channel.c:92 + #5 0x5609704b2bfb in socket_outgoing_migration +/home/elmarco/src/qemu/migration/socket.c:108 + #6 0x560970b9bd6c in qio_task_complete /home/elmarco/src/qemu/io/task.c:196 + #7 0x560970b9aa97 in qio_task_thread_result +/home/elmarco/src/qemu/io/task.c:111 + #8 0x7f2596cfee3a (/lib64/libglib-2.0.so.0+0x4de3a) + +Signed-off-by: Marc-André Lureau +Message-Id: <20200325184723.2029630-2-marcandre.lureau@redhat.com> +Reviewed-by: Juan Quintela +Signed-off-by: Paolo Bonzini +Signed-off-by: Zhenyu Ye +--- + migration/migration.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8f2fc2b4..7949f2a4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3313,7 +3313,12 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED; + + s->expected_downtime = s->parameters.downtime_limit; +- s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); ++ if (resume) { ++ assert(s->cleanup_bh); ++ } else { ++ assert(!s->cleanup_bh); ++ s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s); ++ } + if (error_in) { + migrate_fd_error(s, error_in); + migrate_fd_cleanup(s); +-- +2.22.0.windows.1 + diff --git a/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch b/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch new file mode 100644 index 0000000000000000000000000000000000000000..46775ae5ee200005e5d56f10ccd2c02e75685c7a --- /dev/null +++ b/migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch @@ -0,0 +1,78 @@ +From d65b5b20f4ada9e6c5af37b0fb59fa4709c4bdc9 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Fri, 5 Mar 2021 16:06:52 +0800 +Subject: [PATCH] migration: fix memory leak in qmp_migrate_set_parameters + +"tmp.tls_hostname" and "tmp.tls_creds" allocated by migrate_params_test_apply() +is forgot to free at the end of qmp_migrate_set_parameters(). Fix that. + +The leak stack: +Direct leak of 2 byte(s) in 2 object(s) allocated from: + #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) + #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) + #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) + #3 0xaaaac52447fb in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1377) + #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) + #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c:165) + #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) + #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) + #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) + #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) + #10 0xaaaac55cae0f in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) + #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) + #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) + #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) + #14 0xaaaac47f45ef(/usr/bin/qemu-kvm-4.1.0+0x8455ef) + #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) + #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) + +Direct leak of 2 byte(s) in 2 object(s) allocated from: + #0 0xffffb597c20b in __interceptor_malloc (/usr/lib64/libasan.so.4+0xd320b) + #1 0xffffb52dcb1b in g_malloc (/usr/lib64/libglib-2.0.so.0+0x58b1b) + #2 0xffffb52f8143 in g_strdup (/usr/lib64/libglib-2.0.so.0+0x74143) + #3 0xaaaac5244893 in migrate_params_test_apply (/usr/src/debug/qemu-4.1.0/migration/migration.c:1382) + #4 0xaaaac52fdca7 in qmp_migrate_set_parameters (/usr/src/debug/qemu-4.1.0/qapi/qapi-commands-migration.c:192) + #5 0xaaaac551d543 in qmp_dispatch (/usr/src/debug/qemu-4.1.0/qapi/qmp-dispatch.c) + #6 0xaaaac52a0a8f in qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:125) + #7 0xaaaac52a1c7f in monitor_qmp_dispatch (/usr/src/debug/qemu-4.1.0/monitor/qmp.c:214) + #8 0xaaaac55cb0cf in aio_bh_call (/usr/src/debug/qemu-4.1.0/util/async.c:117) + #9 0xaaaac55d4543 in aio_bh_poll (/usr/src/debug/qemu-4.1.0/util/aio-posix.c:459) + #10 0xaaaac55cae0f in in aio_dispatch (/usr/src/debug/qemu-4.1.0/util/async.c:268) + #11 0xffffb52d6a7b in g_main_context_dispatch (/usr/lib64/libglib-2.0.so.0+0x52a7b) + #12 0xaaaac55d1e3b(/usr/bin/qemu-kvm-4.1.0+0x1622e3b) + #13 0xaaaac4e314bb(/usr/bin/qemu-kvm-4.1.0+0xe824bb) + #14 0xaaaac47f45ef (/usr/bin/qemu-kvm-4.1.0+0x8455ef) + #15 0xffffb4bfef3f in __libc_start_main (/usr/lib64/libc.so.6+0x23f3f) + #16 0xaaaac47ffacb(/usr/bin/qemu-kvm-4.1.0+0x850acb) + +Signed-off-by: Chuan Zheng +Reviewed-by: KeQian Zhu +Reviewed-by: HaiLiang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/migration.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 17a5c16c79..9b40380d7c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1291,12 +1291,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + + if (params->has_tls_creds) { + assert(params->tls_creds->type == QTYPE_QSTRING); +- dest->tls_creds = g_strdup(params->tls_creds->u.s); ++ dest->tls_creds = params->tls_creds->u.s; + } + + if (params->has_tls_hostname) { + assert(params->tls_hostname->type == QTYPE_QSTRING); +- dest->tls_hostname = g_strdup(params->tls_hostname->u.s); ++ dest->tls_hostname = params->tls_hostname->u.s; + } + + if (params->has_max_bandwidth) { +-- +2.27.0 + diff --git a/migration-fix-multifd_send_pages-next-channel.patch b/migration-fix-multifd_send_pages-next-channel.patch new file mode 100644 index 0000000000000000000000000000000000000000..4bb113c644c4175386636e02a5d7188e8c2e408c --- /dev/null +++ b/migration-fix-multifd_send_pages-next-channel.patch @@ -0,0 +1,50 @@ +From c11a23b92334ae86eddfdc2b155d404293891985 Mon Sep 17 00:00:00 2001 +From: alexchen +Date: Tue, 8 Sep 2020 11:18:50 +0000 +Subject: [PATCH 08/11] migration: fix multifd_send_pages() next channel + +multifd_send_pages() loops around the available channels, +the next channel to use between two calls to multifd_send_pages() is stored +inside a local static variable, next_channel. + +It works well, except if the number of channels decreases between two calls +to multifd_send_pages(). In this case, the loop can try to access the +data of a channel that doesn't exist anymore. + +The problem can be triggered if we start a migration with a given number of +channels and then we cancel the migration to restart it with a lower number. +This ends generally with an error like: +qemu-system-ppc64: .../util/qemu-thread-posix.c:77: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed. + +This patch fixes the error by capping next_channel with the current number +of channels before using it. + +Signed-off-by: Laurent Vivier +Message-Id: <20200617113154.593233-1-lvivier@redhat.com> +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: BiaoXiang Ye +--- + migration/ram.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 83cabec6..ac033f22 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -931,6 +931,12 @@ static int multifd_send_pages(RAMState *rs) + uint64_t transferred; + + qemu_sem_wait(&multifd_send_state->channels_ready); ++ /* ++ * next_channel can remain from a previous migration that was ++ * using more channels, so ensure it doesn't overflow if the ++ * limit is lower now. ++ */ ++ next_channel %= migrate_multifd_channels(); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + p = &multifd_send_state->params[i]; + +-- +2.27.0.dirty + diff --git a/migration-multifd-clean-pages-after-filling-packet.patch b/migration-multifd-clean-pages-after-filling-packet.patch new file mode 100644 index 0000000000000000000000000000000000000000..596c5244691dc0a60a486598a74e23466a62645b --- /dev/null +++ b/migration-multifd-clean-pages-after-filling-packet.patch @@ -0,0 +1,51 @@ +From 0f7e704a4faa661583ea6d82659f206e561f23d4 Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Sat, 26 Oct 2019 07:19:59 +0800 +Subject: [PATCH 3/8] migration/multifd: clean pages after filling packet + +This is a preparation for the next patch: + + not use multifd during postcopy. + +Without enabling postcopy, everything looks good. While after enabling +postcopy, migration may fail even not use multifd during postcopy. The +reason is the pages is not properly cleared and *old* target page will +continue to be transferred. + +After clean pages, migration succeeds. + +Signed-off-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/ram.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 840e354..c2eb1ed 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -947,10 +947,10 @@ static int multifd_send_pages(RAMState *rs) + } + qemu_mutex_unlock(&p->mutex); + } +- p->pages->used = 0; ++ assert(!p->pages->used); ++ assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; +- p->pages->block = NULL; + multifd_send_state->pages = p->pages; + p->pages = pages; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; +@@ -1137,6 +1137,7 @@ static void *multifd_send_thread(void *opaque) + p->num_packets++; + p->num_pages += used; + p->pages->used = 0; ++ p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, used, flags, +-- +1.8.3.1 + diff --git a/migration-multifd-fix-destroyed-mutex-access-in-term.patch b/migration-multifd-fix-destroyed-mutex-access-in-term.patch new file mode 100644 index 0000000000000000000000000000000000000000..a927ea533c253ff242c5867cde1055453668c1c5 --- /dev/null +++ b/migration-multifd-fix-destroyed-mutex-access-in-term.patch @@ -0,0 +1,64 @@ +From 34d797aa134a33c1d67ca85d9d9f996d58162276 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Wed, 23 Oct 2019 11:47:37 +0800 +Subject: [PATCH 09/10] migration/multifd: fix destroyed mutex access in + terminating multifd threads + +One multifd will lock all the other multifds' IOChannel mutex to inform them +to quit by setting p->quit or shutting down p->c. In this senario, if some +multifds had already been terminated and multifd_load_cleanup/multifd_save_cleanup +had destroyed their mutex, it could cause destroyed mutex access when trying +lock their mutex. + +Here is the coredump stack: + #0 0x00007f81a2794437 in raise () from /usr/lib64/libc.so.6 + #1 0x00007f81a2795b28 in abort () from /usr/lib64/libc.so.6 + #2 0x00007f81a278d1b6 in __assert_fail_base () from /usr/lib64/libc.so.6 + #3 0x00007f81a278d262 in __assert_fail () from /usr/lib64/libc.so.6 + #4 0x000055eb1bfadbd3 in qemu_mutex_lock_impl (mutex=0x55eb1e2d1988, file=, line=) at util/qemu-thread-posix.c:64 + #5 0x000055eb1bb4564a in multifd_send_terminate_threads (err=) at migration/ram.c:1015 + #6 0x000055eb1bb4bb7f in multifd_send_thread (opaque=0x55eb1e2d19f8) at migration/ram.c:1171 + #7 0x000055eb1bfad628 in qemu_thread_start (args=0x55eb1e170450) at util/qemu-thread-posix.c:502 + #8 0x00007f81a2b36df5 in start_thread () from /usr/lib64/libpthread.so.0 + #9 0x00007f81a286048d in clone () from /usr/lib64/libc.so.6 + +To fix it up, let's destroy the mutex after all the other multifd threads had +been terminated. + +Change-Id: I4124d43e8558ba302052bdc53fdae7cfcf9d8687 +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/ram.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 029f1cdf..d7d2d5ec 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1033,6 +1033,10 @@ void multifd_save_cleanup(void) + if (p->running) { + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ + socket_send_channel_destroy(p->c); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +@@ -1306,6 +1310,10 @@ int multifd_load_cleanup(Error **errp) + qemu_sem_post(&p->sem_sync); + qemu_thread_join(&p->thread); + } ++ } ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDRecvParams *p = &multifd_recv_state->params[i]; ++ + object_unref(OBJECT(p->c)); + p->c = NULL; + qemu_mutex_destroy(&p->mutex); +-- +2.19.1 diff --git a/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch b/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..021fbcf8a6e4053b5e051ce3885b677d678259ca --- /dev/null +++ b/migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch @@ -0,0 +1,83 @@ +From 26ffadd08711aa4ef62932ac0ecf5048518b2801 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:50:12 +0800 +Subject: [PATCH] migration/multifd: fix hangup with TLS-Multifd due to + blocking handshake +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The qemu main loop could hang up forever when we enable TLS+Multifd. +The Src multifd_send_0 invokes tls handshake, it sends hello to sever +and wait response. +However, the Dst main qemu loop has been waiting recvmsg() for multifd_recv_1. +Both of Src and Dst main qemu loop are blocking and waiting for reponse which +results in hanging up forever. + +Src: (multifd_send_0) Dst: (multifd_recv_1) +multifd_channel_connect migration_channel_process_incoming + multifd_tls_channel_connect migration_tls_channel_process_incoming + multifd_tls_channel_connect qio_channel_tls_handshake_task + qio_channel_tls_handshake gnutls_handshake + qio_channel_tls_handshake_task ... + qcrypto_tls_session_handshake ... + gnutls_handshake ... + ... ... + recvmsg (Blocking I/O waiting for response) recvmsg (Blocking I/O waiting for response) + +Fix this by offloadinig handshake work to a background thread. + +Reported-by: Yan Jin +Suggested-by: Daniel P. Berrangé +Signed-off-by: Chuan Zheng +Message-Id: <1604643893-8223-1-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 23 +++++++++++++++++------ + 1 file changed, 17 insertions(+), 6 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index dc9831d7f3..a37dbfc049 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1220,6 +1220,19 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + multifd_channel_connect(p, ioc, err); + } + ++static void *multifd_tls_handshake_thread(void *opaque) ++{ ++ MultiFDSendParams *p = opaque; ++ QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); ++ ++ qio_channel_tls_handshake(tioc, ++ multifd_tls_outgoing_handshake, ++ p, ++ NULL, ++ NULL); ++ return NULL; ++} ++ + static void multifd_tls_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error **errp) +@@ -1235,12 +1248,10 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, + + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); +- qio_channel_tls_handshake(tioc, +- multifd_tls_outgoing_handshake, +- p, +- NULL, +- NULL); +- ++ p->c = QIO_CHANNEL(tioc); ++ qemu_thread_create(&p->thread, "multifd-tls-handshake-worker", ++ multifd_tls_handshake_thread, p, ++ QEMU_THREAD_JOINABLE); + } + + static bool multifd_channel_connect(MultiFDSendParams *p, +-- +2.27.0 + diff --git a/migration-multifd-fix-nullptr-access-in-multifd_send.patch b/migration-multifd-fix-nullptr-access-in-multifd_send.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2d278a135434e3b0838be7876b1fe2a616816cd --- /dev/null +++ b/migration-multifd-fix-nullptr-access-in-multifd_send.patch @@ -0,0 +1,62 @@ +From 6a08ee257a95d9f2514bd995e90ddf46d3f78b41 Mon Sep 17 00:00:00 2001 +From: Zheng Chuan +Date: Tue, 21 Apr 2020 19:49:26 +0800 +Subject: [PATCH 10/10] migration/multifd: fix nullptr access in + multifd_send_terminate_threads + +If the multifd_send_threads is not created when migration is failed, +multifd_save_cleanup would be called twice. In this senario, the +multifd_send_state is accessed after it has been released, the result +is that the source VM is crashing down. + +Here is the coredump stack: + Program received signal SIGSEGV, Segmentation fault. + 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + 1012 MultiFDSendParams *p = &multifd_send_state->params[i]; + #0 0x00005629333a78ef in multifd_send_terminate_threads (err=err@entry=0x0) at migration/ram.c:1012 + #1 0x00005629333ab8a9 in multifd_save_cleanup () at migration/ram.c:1028 + #2 0x00005629333abaea in multifd_new_send_channel_async (task=0x562935450e70, opaque=) at migration/ram.c:1202 + #3 0x000056293373a562 in qio_task_complete (task=task@entry=0x562935450e70) at io/task.c:196 + #4 0x000056293373a6e0 in qio_task_thread_result (opaque=0x562935450e70) at io/task.c:111 + #5 0x00007f475d4d75a7 in g_idle_dispatch () from /usr/lib64/libglib-2.0.so.0 + #6 0x00007f475d4da9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #7 0x0000562933785b33 in glib_pollfds_poll () at util/main-loop.c:219 + #8 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #9 main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:518 + #10 0x00005629334c5acf in main_loop () at vl.c:1810 + #11 0x000056293334d7bb in main (argc=, argv=, envp=) at vl.c:4471 + +If the multifd_send_threads is not created when migration is failed. +In this senario, we don't call multifd_save_cleanup in multifd_new_send_channel_async. + +Change-Id: I7441efe2ed542054ecd2a4da8146e2652824b452 +Signed-off-by: Zhimin Feng +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/ram.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index d7d2d5ec..1858d66c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1205,7 +1205,15 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + + if (qio_task_propagate_error(task, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); +- multifd_save_cleanup(); ++ /* Error happen, we need to tell who pay attention to me */ ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ /* ++ * Although multifd_send_thread is not created, but main migration ++ * thread neet to judge whether it is running, so we need to mark ++ * its status. ++ */ ++ p->quit = true; + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); +-- +2.19.1 diff --git a/migration-multifd-fix-nullptr-access-in-terminating-m.patch b/migration-multifd-fix-nullptr-access-in-terminating-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..d403b28f28a708a94d7799618053e53c7d75b939 --- /dev/null +++ b/migration-multifd-fix-nullptr-access-in-terminating-m.patch @@ -0,0 +1,75 @@ +From d9a847f0982fcca6f63031215065c346fcc27bbc Mon Sep 17 00:00:00 2001 +From: Zheng Chuan +Date: Fri, 24 Apr 2020 11:58:33 +0800 +Subject: [PATCH 06/10] migration/multifd: fix nullptr access in terminating + multifd threads + +One multifd channel will shutdown all the other multifd's IOChannel when it +fails to receive an IOChannel. In this senario, if some multifds had not +received its IOChannel yet, it would try to shutdown its IOChannel which could +cause nullptr access at qio_channel_shutdown. + +Here is the coredump stack: + #0 object_get_class (obj=obj@entry=0x0) at qom/object.c:908 + #1 0x00005563fdbb8f4a in qio_channel_shutdown (ioc=0x0, how=QIO_CHANNEL_SHUTDOWN_BOTH, errp=0x0) at io/channel.c:355 + #2 0x00005563fd7b4c5f in multifd_recv_terminate_threads (err=) at migration/ram.c:1280 + #3 0x00005563fd7bc019 in multifd_recv_new_channel (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce00) at migration/ram.c:1478 + #4 0x00005563fda82177 in migration_ioc_process_incoming (ioc=ioc@entry=0x556400255610, errp=errp@entry=0x7ffec07dce30) at migration/migration.c:605 + #5 0x00005563fda8567d in migration_channel_process_incoming (ioc=0x556400255610) at migration/channel.c:44 + #6 0x00005563fda83ee0 in socket_accept_incoming_migration (listener=0x5563fff6b920, cioc=0x556400255610, opaque=) at migration/socket +.c:166 + #7 0x00005563fdbc25cd in qio_net_listener_channel_func (ioc=, condition=, opaque=) at io/net-listener.c:54 + #8 0x00007f895b6fe9a9 in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 + #9 0x00005563fdc18136 in glib_pollfds_poll () at util/main-loop.c:218 + #10 0x00005563fdc181b5 in os_host_main_loop_wait (timeout=1000000000) at util/main-loop.c:241 + #11 0x00005563fdc183a2 in main_loop_wait (nonblocking=nonblocking@entry=0) at util/main-loop.c:517 + #12 0x00005563fd8edb37 in main_loop () at vl.c:1791 + #13 0x00005563fd74fd45 in main (argc=, argv=, envp=) at vl.c:4473 + +To fix it up, let's check p->c before calling qio_channel_shutdown. + +Change-Id: Ib36c1b3d866a3ad92d1460512df840cfb8736ab6 +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/ram.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 51811c2d..756a525f 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1112,6 +1112,7 @@ static void *multifd_send_thread(void *opaque) + rcu_register_thread(); + + if (multifd_send_initial_packet(p, &local_err) < 0) { ++ ret = -1; + goto out; + } + /* initial packet */ +@@ -1178,9 +1179,7 @@ out: + * who pay attention to me. + */ + if (ret != 0) { +- if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&p->sem_sync); +- } ++ qemu_sem_post(&p->sem_sync); + qemu_sem_post(&multifd_send_state->channels_ready); + } + +@@ -1279,7 +1278,9 @@ static void multifd_recv_terminate_threads(Error *err) + - normal quit, i.e. everything went fine, just finished + - error quit: We close the channels so the channel threads + finish the qio_channel_read_all_eof() */ +- qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ if (p->c) { ++ qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++ } + qemu_mutex_unlock(&p->mutex); + } + } +-- +2.19.1 diff --git a/migration-multifd-fix-potential-wrong-acception-orde.patch b/migration-multifd-fix-potential-wrong-acception-orde.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b8f18ce71abccf8987fb9261817654b3b10d631 --- /dev/null +++ b/migration-multifd-fix-potential-wrong-acception-orde.patch @@ -0,0 +1,302 @@ +From 71f3e496c128b46f803cc4776154b02a5e505cb2 Mon Sep 17 00:00:00 2001 +From: Zheng Chuan +Date: Wed, 22 Apr 2020 13:45:39 +0800 +Subject: [PATCH] migration/multifd: fix potential wrong acception order of + IOChannel + +Multifd assumes the migration thread IOChannel is always established before +the multifd IOChannels, but this assumption will be broken in many situations +like network packet loss. + +For example: +Step1: Source (migration thread IOChannel) --SYN--> Destination +Step2: Source (migration thread IOChannel) <--SYNACK Destination +Step3: Source (migration thread IOChannel, lost) --ACK-->X Destination +Step4: Source (multifd IOChannel) --SYN--> Destination +Step5: Source (multifd IOChannel) <--SYNACK Destination +Step6: Source (multifd IOChannel, ESTABLISHED) --ACK--> Destination +Step7: Destination accepts multifd IOChannel +Step8: Source (migration thread IOChannel, ESTABLISHED) -ACK,DATA-> Destination +Step9: Destination accepts migration thread IOChannel + +The above situation can be reproduced by creating a weak network environment, +such as "tc qdisc add dev eth0 root netem loss 50%". The wrong acception order +will cause magic check failure and thus lead to migration failure. + +This patch fixes this issue by sending a migration IOChannel initial packet with +a unique id when using multifd migration. Since the multifd IOChannels will also +send initial packets, the destination can judge whether the processing IOChannel +belongs to multifd by checking the id in the initial packet. This mechanism can +ensure that different IOChannels will go to correct branches in our test. + +Change-Id: I63d1c32c7b66063bd6a3c5e7d63500555bd148b9 +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang + +diff --git a/migration/channel.c b/migration/channel.c +index 20e4c8e2..74621814 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -82,6 +82,15 @@ void migration_channel_connect(MigrationState *s, + return; + } + } else { ++ if (migrate_use_multifd()) { ++ /* multifd migration cannot distinguish migration IOChannel ++ * from multifd IOChannels, so we need to send an initial packet ++ * to show it is migration IOChannel ++ */ ++ migration_send_initial_packet(ioc, ++ migrate_multifd_channels(), ++ &error); ++ } + QEMUFile *f = qemu_fopen_channel_output(ioc); + + qemu_mutex_lock(&s->qemu_file_lock); +diff --git a/migration/migration.c b/migration/migration.c +index 114c33a1..8f2fc2b4 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -517,12 +517,6 @@ static void migration_incoming_setup(QEMUFile *f) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup() != 0) { +- /* We haven't been able to create multifd threads +- nothing better to do */ +- exit(EXIT_FAILURE); +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -580,36 +574,41 @@ void migration_fd_process_incoming(QEMUFile *f) + void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); +- bool start_migration; +- +- if (!mis->from_src_file) { +- /* The first connection (multifd may have multiple) */ +- QEMUFile *f = qemu_fopen_channel_input(ioc); ++ Error *local_err = NULL; ++ int id = 0; + +- /* If it's a recovery, we're done */ +- if (postcopy_try_recover(f)) { +- return; +- } ++ if (migrate_use_multifd()) { ++ id = migration_recv_initial_packet(ioc, &local_err); ++ } ++ if (!migrate_use_multifd() || id == migrate_multifd_channels()) { ++ if (!mis->from_src_file) { ++ /* The migration connection (multifd may have multiple) */ ++ QEMUFile *f = qemu_fopen_channel_input(ioc); + +- migration_incoming_setup(f); ++ /* If it's a recovery, we're done */ ++ if (postcopy_try_recover(f)) { ++ return; ++ } + +- /* +- * Common migration only needs one channel, so we can start +- * right now. Multifd needs more than one channel, we wait. +- */ +- start_migration = !migrate_use_multifd(); +- } else { +- Error *local_err = NULL; ++ migration_incoming_setup(f); ++ } ++ } else if (id >= 0) { + /* Multiple connections */ + assert(migrate_use_multifd()); +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, id, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } ++ } else { ++ /* Bad connections */ ++ multifd_recv_terminate_threads(local_err); ++ error_propagate(errp, local_err); ++ return; + } + +- if (start_migration) { ++ /* Once we have all the channels we need, we can start migration */ ++ if (migration_has_all_channels()) { + migration_incoming_process(); + } + } +diff --git a/migration/migration.h b/migration/migration.h +index 1fdd7b21..feb34430 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -339,4 +339,7 @@ int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); + void migration_make_urgent_request(void); + void migration_consume_urgent_request(void); + ++int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp); ++int migration_recv_initial_packet(QIOChannel *c, Error **errp); ++ + #endif +diff --git a/migration/ram.c b/migration/ram.c +index 756a525f..029f1cdf 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -593,7 +593,7 @@ typedef struct { + uint8_t id; + uint8_t unused1[7]; /* Reserved for future use */ + uint64_t unused2[4]; /* Reserved for future use */ +-} __attribute__((packed)) MultiFDInit_t; ++} __attribute__((packed)) MigrationInit_t; + + typedef struct { + uint32_t magic; +@@ -702,26 +702,26 @@ typedef struct { + QemuSemaphore sem_sync; + } MultiFDRecvParams; + +-static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) ++int migration_send_initial_packet(QIOChannel *c, uint8_t id, Error **errp) + { +- MultiFDInit_t msg; ++ MigrationInit_t msg; + int ret; + + msg.magic = cpu_to_be32(MULTIFD_MAGIC); + msg.version = cpu_to_be32(MULTIFD_VERSION); +- msg.id = p->id; ++ msg.id = id; + memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid)); + +- ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp); ++ ret = qio_channel_write_all(c, (char *)&msg, sizeof(msg), errp); + if (ret != 0) { + return -1; + } + return 0; + } + +-static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) ++int migration_recv_initial_packet(QIOChannel *c, Error **errp) + { +- MultiFDInit_t msg; ++ MigrationInit_t msg; + int ret; + + ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp); +@@ -756,8 +756,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel id %d " ++ "expected [0-%d]", msg.id, migrate_multifd_channels()); + return -1; + } + +@@ -1111,7 +1111,7 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +- if (multifd_send_initial_packet(p, &local_err) < 0) { ++ if (migration_send_initial_packet(p->c, p->id, &local_err) < 0) { + ret = -1; + goto out; + } +@@ -1255,7 +1255,7 @@ struct { + uint64_t packet_num; + } *multifd_recv_state; + +-static void multifd_recv_terminate_threads(Error *err) ++void multifd_recv_terminate_threads(Error *err) + { + int i; + +@@ -1470,21 +1470,10 @@ bool multifd_recv_all_channels_created(void) + * - Return false and do not set @errp when correctly receiving the current one; + * - Return false and set @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, int id, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +- int id; +- +- id = multifd_recv_initial_packet(ioc, &local_err); +- if (id < 0) { +- multifd_recv_terminate_threads(local_err); +- error_propagate_prepend(errp, local_err, +- "failed to receive packet" +- " via multifd channel %d: ", +- atomic_read(&multifd_recv_state->count)); +- return false; +- } + + p = &multifd_recv_state->params[id]; + if (p->c != NULL) { +@@ -1492,7 +1481,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1503,8 +1492,6 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + atomic_inc(&multifd_recv_state->count); +- return atomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } + + /** +diff --git a/migration/ram.h b/migration/ram.h +index bd0eee79..a788ff0e 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -46,7 +46,8 @@ void multifd_save_cleanup(void); + int multifd_load_setup(void); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, int id, Error **errp); ++void multifd_recv_terminate_threads(Error *err); + + uint64_t ram_pagesize_summary(void); + int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); +diff --git a/migration/socket.c b/migration/socket.c +index 98efdc02..093b956b 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -22,6 +22,7 @@ + #include "channel.h" + #include "socket.h" + #include "migration.h" ++#include "ram.h" + #include "qemu-file.h" + #include "io/channel-socket.h" + #include "io/net-listener.h" +@@ -181,6 +182,12 @@ static void socket_start_incoming_migration(SocketAddress *saddr, + + qio_net_listener_set_name(listener, "migration-socket-listener"); + ++ if (multifd_load_setup() != 0) { ++ /* We haven't been able to create multifd threads ++ nothing better to do */ ++ exit(EXIT_FAILURE); ++ } ++ + if (qio_net_listener_open_sync(listener, saddr, errp) < 0) { + object_unref(OBJECT(listener)); + return; +-- +2.23.0 diff --git a/migration-multifd-not-use-multifd-during-postcopy.patch b/migration-multifd-not-use-multifd-during-postcopy.patch new file mode 100644 index 0000000000000000000000000000000000000000..6df61bfdd8d637854acea0e13e787db04dbdeca2 --- /dev/null +++ b/migration-multifd-not-use-multifd-during-postcopy.patch @@ -0,0 +1,41 @@ +From 7331554bd6ab230404b20d612aed20a95c20eba6 Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Sat, 26 Oct 2019 07:20:00 +0800 +Subject: [PATCH 4/8] migration/multifd: not use multifd during postcopy + +We don't support multifd during postcopy, but user still could enable +both multifd and postcopy. This leads to migration failure. + +Skip multifd during postcopy. + +Signed-off-by: Wei Yang +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +--- + migration/ram.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index c2eb1ed..aace3a5 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2571,10 +2571,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, + } + + /* +- * do not use multifd for compression as the first page in the new +- * block should be posted out before sending the compressed page ++ * Do not use multifd for: ++ * 1. Compression as the first page in the new block should be posted out ++ * before sending the compressed page ++ * 2. In postcopy as one whole host page should be placed + */ +- if (!save_page_use_compression(rs) && migrate_use_multifd()) { ++ if (!save_page_use_compression(rs) && migrate_use_multifd() ++ && !migration_in_postcopy()) { + return ram_save_multifd_page(rs, block, offset); + } + +-- +1.8.3.1 + diff --git a/migration-ram-Do-error_free-after-migrate_set_error-.patch b/migration-ram-Do-error_free-after-migrate_set_error-.patch new file mode 100644 index 0000000000000000000000000000000000000000..0039f43d86d5506bfca2953904a215d3f178526a --- /dev/null +++ b/migration-ram-Do-error_free-after-migrate_set_error-.patch @@ -0,0 +1,69 @@ +From 05d1fbd2390d441e5acb606dba3d308d506a8eb1 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Tue, 5 May 2020 11:44:20 +0800 +Subject: [PATCH 1/3] migration/ram: Do error_free after migrate_set_error to + avoid memleaks + +If local_err is not NULL, it use error_copy to set migrate error in +multifd_send_terminate_threads. Thus, we should free it. + +Similarly, fix another leak in multifd_recv_thread. + +The leak stack: +Direct leak of 96 byte(s) in 2 object(s) allocated from: + #0 0xfffdd97fe938 in __interceptor_calloc (/lib64/libasan.so.4+0xee938) + #1 0xfffdd85a8bb0 in g_malloc0 (/lib64/libglib-2.0.so.0+0x58bb0) + #2 0xaaadfc6e41c4 in error_setv util/error.c:61 + #3 0xaaadfc6e4880 in error_setg_errno_internal util/error.c:109 + #4 0xaaadfc6192a8 in qio_channel_socket_writev io/channel-socket.c:552 + #5 0xaaadfc614604 in qio_channel_writev_all io/channel.c:171 + #6 0xaaadfc6147ec in qio_channel_write_all io/channel.c:257 + #7 0xaaadfbaec5fc in multifd_send_thread /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1145 + #8 0xaaadfc6db768 in qemu_thread_start util/qemu-thread-posix.c:502 + #9 0xfffdd79a88c8 (/lib64/libpthread.so.0+0x88c8) + #10 0xfffdd78e9578 (/lib64/libc.so.6+0xd9578) + +Indirect leak of 104 byte(s) in 2 object(s) allocated from: + #0 0xfffdd97feb40 in realloc (/lib64/libasan.so.4+0xeeb40) + #1 0xfffdd78fa6e0 in __vasprintf_chk (/lib64/libc.so.6+0xea6e0) + #2 0xfffdd85ee710 in g_vasprintf (/lib64/libglib-2.0.so.0+0x9e710) + #3 0xfffdd85c45c4 in g_strdup_vprintf (/lib64/libglib-2.0.so.0+0x745c4) + #4 0xfffdd85c4674 in g_strdup_printf (/lib64/libglib-2.0.so.0+0x74674) + #5 0xaaadfc6e4214 in error_setv util/error.c:65 + #6 0xaaadfc6e4880 in error_setg_errno_internal util/error.c:109 + #7 0xaaadfc6192a8 in qio_channel_socket_writev io/channel-socket.c:552 + #8 0xaaadfc614604 in qio_channel_writev_all io/channel.c:171 + #9 0xaaadfc6147ec in qio_channel_write_all io/channel.c:257 + #10 0xaaadfbaec5fc in multifd_send_thread /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1145 + #11 0xaaadfc6db768 in qemu_thread_start util/qemu-thread-posix.c:502 + #12 0xfffdd79a88c8 (/lib64/libpthread.so.0+0x88c8) + #13 0xfffdd78e9578 (/lib64/libc.so.6+0xd9578) + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +--- + migration/ram.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 1858d66c..6baf1412 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1176,6 +1176,7 @@ static void *multifd_send_thread(void *opaque) + out: + if (local_err) { + multifd_send_terminate_threads(local_err); ++ error_free(local_err); + } + + /* +@@ -1427,6 +1428,7 @@ static void *multifd_recv_thread(void *opaque) + + if (local_err) { + multifd_recv_terminate_threads(local_err); ++ error_free(local_err); + } + qemu_mutex_lock(&p->mutex); + p->running = false; +-- +2.23.0 diff --git a/migration-ram-Optimize-ram_save_host_page.patch b/migration-ram-Optimize-ram_save_host_page.patch new file mode 100644 index 0000000000000000000000000000000000000000..c58a6dcb6a5f3dc85be056f1c6ffd3a0bf3ba972 --- /dev/null +++ b/migration-ram-Optimize-ram_save_host_page.patch @@ -0,0 +1,95 @@ +From ae1a8506aa45266f2bf77a8d428f5ccd970a9b13 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 16 Mar 2021 20:57:16 +0800 +Subject: [PATCH] migration/ram: Optimize ram_save_host_page() + +Starting from pss->page, ram_save_host_page() will check every page +and send the dirty pages up to the end of the current host page or +the boundary of used_length of the block. If the host page size is +a huge page, the step "check" will take a lot of time. + +It will improve performance to use migration_bitmap_find_dirty(). + +Tested on Kunpeng 920; VM parameters: 1U 4G (page size 1G) +The time of ram_save_host_page() in the last round of ram saving: +before optimize: 9250us after optimize: 34us + +Signed-off-by: Keqian Zhu +Signed-off-by: Kunkun Jiang +Reviewed-by: Peter Xu +Message-Id: <20210316125716.1243-3-jiangkunkun@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 43 +++++++++++++++++++++---------------------- + 1 file changed, 21 insertions(+), 22 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 22063e00b4..1bd99ff9e5 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3052,6 +3052,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + int tmppages, pages = 0; + size_t pagesize_bits = + qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS; ++ unsigned long hostpage_boundary = ++ QEMU_ALIGN_UP(pss->page + 1, pagesize_bits); + + if (ramblock_is_ignored(pss->block)) { + error_report("block %s should not be migrated !", pss->block->idstr); +@@ -3060,34 +3062,31 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + + do { + /* Check the pages is dirty and if it is send it */ +- if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { +- pss->page++; +- continue; +- } +- +- tmppages = ram_save_target_page(rs, pss, last_stage); +- if (tmppages < 0) { +- return tmppages; +- } ++ if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { ++ tmppages = ram_save_target_page(rs, pss, last_stage); ++ if (tmppages < 0) { ++ return tmppages; ++ } + +- pages += tmppages; +- if (pss->block->unsentmap) { +- clear_bit(pss->page, pss->block->unsentmap); +- } ++ pages += tmppages; ++ if (pss->block->unsentmap) { ++ clear_bit(pss->page, pss->block->unsentmap); ++ } + +- pss->page++; +- /* +- * Allow rate limiting to happen in the middle of huge pages if +- * something is sent in the current iteration. +- */ +- if (pagesize_bits > 1 && tmppages > 0) { +- migration_rate_limit(); ++ /* ++ * Allow rate limiting to happen in the middle of huge pages if ++ * something is sent in the current iteration. ++ */ ++ if (pagesize_bits > 1 && tmppages > 0) { ++ migration_rate_limit(); ++ } + } ++ pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page); + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +- /* The offset we leave with is the last one we looked at */ +- pss->page--; ++ /* The offset we leave with is the min boundary of host page and block */ ++ pss->page = MIN(pss->page, hostpage_boundary) - 1; + return pages; + } + +-- +2.27.0 + diff --git a/migration-ram-Reduce-unnecessary-rate-limiting.patch b/migration-ram-Reduce-unnecessary-rate-limiting.patch new file mode 100644 index 0000000000000000000000000000000000000000..64374dd3e255224e650c8de3e93669db04a6c413 --- /dev/null +++ b/migration-ram-Reduce-unnecessary-rate-limiting.patch @@ -0,0 +1,42 @@ +From 338d691c985ad5b3624ef36e4beaac82982c8f0a Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 16 Mar 2021 20:57:15 +0800 +Subject: [PATCH] migration/ram: Reduce unnecessary rate limiting + +When the host page is a huge page and something is sent in the +current iteration, migration_rate_limit() should be executed. +If not, it can be omitted. + +Signed-off-by: Keqian Zhu +Signed-off-by: Kunkun Jiang +Reviewed-by: David Edmondson +Reviewed-by: Dr. David Alan Gilbert +Message-Id: <20210316125716.1243-2-jiangkunkun@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 2077ba5be4..22063e00b4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3076,8 +3076,13 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, + } + + pss->page++; +- /* Allow rate limiting to happen in the middle of huge pages */ +- migration_rate_limit(); ++ /* ++ * Allow rate limiting to happen in the middle of huge pages if ++ * something is sent in the current iteration. ++ */ ++ if (pagesize_bits > 1 && tmppages > 0) { ++ migration_rate_limit(); ++ } + } while ((pss->page & (pagesize_bits - 1)) && + offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS)); + +-- +2.27.0 + diff --git a/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch b/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9cb2bf652b90968144c673fd6c59655acfd785f --- /dev/null +++ b/migration-ram-fix-memleaks-in-multifd_new_send_chann.patch @@ -0,0 +1,54 @@ +From 4d456b243a41a8e91535b2820fd6ed4f6fb4a194 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Tue, 5 May 2020 15:50:54 +0800 +Subject: [PATCH 2/3] migration/ram: fix memleaks in + multifd_new_send_channel_async + +When error happen in multifd_new_send_channel_async, 'sioc' will not be used +to create the multifd_send_thread. Let's free it to avoid a memleak. And also +do error_free after migrate_set_error() to avoid another leak in the same place. + +The leak stack: +Direct leak of 2160 byte(s) in 6 object(s) allocated from: + #0 0xfffdd97fe754 in malloc (/lib64/libasan.so.4+0xee754) + #1 0xfffdd85a8b48 in g_malloc (/lib64/libglib-2.0.so.0+0x58b48) + #2 0xaaadfc4e2b10 in object_new_with_type qom/object.c:634 + #3 0xaaadfc619468 in qio_channel_socket_new io/channel-socket.c:56 + #4 0xaaadfc3d3e74 in socket_send_channel_create migration/socket.c:37 + #5 0xaaadfbaed6f4 in multifd_save_setup /usr/src/debug/qemu-4.1.0-4_asan.aarch64/migration/ram.c:1255 + #6 0xaaadfc3d2f78 in migrate_fd_connect migration/migration.c:3359 + #7 0xaaadfc3d6240 in migration_channel_connect migration/channel.c:101 + #8 0xaaadfc3d3590 in socket_outgoing_migration migration/socket.c:108 + #9 0xaaadfc625a64 in qio_task_complete io/task.c:195 + #10 0xaaadfc625ed0 in qio_task_thread_result io/task.c:111 + #11 0xfffdd859edec (/lib64/libglib-2.0.so.0+0x4edec) + #12 0xfffdd85a2a78 in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x52a78) + #13 0xaaadfc6d3b84 in glib_pollfds_poll util/main-loop.c:218 + #14 0xaaadfc6d3b84 in os_host_main_loop_wait util/main-loop.c:241 + #15 0xaaadfc6d3b84 in main_loop_wait util/main-loop.c:517 + #16 0xaaadfbf9206c in main_loop /usr/src/debug/qemu-4.1.0-4_asan.aarch64/vl.c:1791 + #17 0xaaadfba1b124 in main /usr/src/debug/qemu-4.1.0-4_asan.aarch64/vl.c:4473 + #18 0xfffdd7833f5c in __libc_start_main (/lib64/libc.so.6+0x23f5c) + #19 0xaaadfba26360 (/usr/libexec/qemu-kvm+0x886360) + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +--- + migration/ram.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 6baf1412..840e3548 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1215,6 +1215,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + * its status. + */ + p->quit = true; ++ object_unref(OBJECT(sioc)); ++ error_free(local_err); + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); +-- +2.23.0 diff --git a/migration-ram-fix-use-after-free-of-local_err.patch b/migration-ram-fix-use-after-free-of-local_err.patch new file mode 100644 index 0000000000000000000000000000000000000000..f74e3b18df98ae0e5a88ff9224fa06c8ea24197a --- /dev/null +++ b/migration-ram-fix-use-after-free-of-local_err.patch @@ -0,0 +1,33 @@ +From 019526f7f7b42a7d1b8a74e1db6a8050adf9e1fb Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 24 Mar 2020 18:36:29 +0300 +Subject: [PATCH 08/14] migration/ram: fix use after free of local_err + +local_err is used again in migration_bitmap_sync_precopy() after +precopy_notify(), so we must zero it. Otherwise try to set +non-NULL local_err will crash. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200324153630.11882-6-vsementsov@virtuozzo.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Peng Liang +--- + migration/ram.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 840e35480b04..5d1ae7570018 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1912,6 +1912,7 @@ static void migration_bitmap_sync_precopy(RAMState *rs) + */ + if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) { + error_report_err(local_err); ++ local_err = NULL; + } + + migration_bitmap_sync(rs); +-- +2.26.2 + diff --git a/migration-rdma-cleanup-rdma-context-before-g_free-to.patch b/migration-rdma-cleanup-rdma-context-before-g_free-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..a39894ada540a713645b0735b719eb4d5a3edbff --- /dev/null +++ b/migration-rdma-cleanup-rdma-context-before-g_free-to.patch @@ -0,0 +1,58 @@ +From 9867dc6fc3f131324b73664b9617376270d8d013 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Fri, 8 May 2020 06:07:55 -0400 +Subject: [PATCH 4/5] migration/rdma: cleanup rdma context before g_free to + avoid memleaks + +When error happen in initializing 'rdma_return_path', we should cleanup rdma context +before g_free(rdma) to avoid some memleaks. This patch fix that. + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Message-Id: <20200508100755.7875-3-pannengyuan@huawei.com> +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +--- + migration/rdma.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/migration/rdma.c b/migration/rdma.c +index 3036221e..bb24dac5 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -4103,20 +4103,20 @@ void rdma_start_outgoing_migration(void *opaque, + rdma_return_path = qemu_rdma_data_init(host_port, errp); + + if (rdma_return_path == NULL) { +- goto err; ++ goto return_path_err; + } + + ret = qemu_rdma_source_init(rdma_return_path, + s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); + + if (ret) { +- goto err; ++ goto return_path_err; + } + + ret = qemu_rdma_connect(rdma_return_path, errp); + + if (ret) { +- goto err; ++ goto return_path_err; + } + + rdma->return_path = rdma_return_path; +@@ -4129,6 +4129,8 @@ void rdma_start_outgoing_migration(void *opaque, + s->to_dst_file = qemu_fopen_rdma(rdma, "wb"); + migrate_fd_connect(s, NULL); + return; ++return_path_err: ++ qemu_rdma_cleanup(rdma); + err: + g_free(rdma); + g_free(rdma_return_path); +-- +2.23.0 + diff --git a/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch b/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e0fb101d827377551a7858f225cf365367e12b7 --- /dev/null +++ b/migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch @@ -0,0 +1,43 @@ +From 8ae2e3b8be812bcbdeb6151c685026bcaedd4a4b Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Sat, 9 May 2020 15:25:42 +0800 +Subject: [PATCH 3/3] migration/rdma: fix a memleak on error path in + rdma_start_incoming_migration + +'rdma->host' is malloced in qemu_rdma_data_init, but forgot to free on the error +path in rdma_start_incoming_migration(), this patch fix that. + +Direct leak of 2 byte(s) in 1 object(s) allocated from: + #0 0xfffce56d34fb in __interceptor_malloc (/lib64/libasan.so.4+0xd34fb) + #1 0xfffce5158aa3 in g_malloc (/lib64/libglib-2.0.so.0+0x58aa3) + #2 0xfffce5174213 in g_strdup (/lib64/libglib-2.0.so.0+0x74213) + #3 0xaaad7c569ddf in qemu_rdma_data_init /Images/qemu/migration/rdma.c:2647 + #4 0xaaad7c57c99f in rdma_start_incoming_migration /Images/qemu/migration/rdma.c:4020 + #5 0xaaad7c52b35f in qemu_start_incoming_migration /Images/qemu/migration/migration.c:371 + #6 0xaaad7be173bf in qemu_init /Images/qemu/softmmu/vl.c:4464 + #7 0xaaad7bb29843 in main /Images/qemu/softmmu/main.c:48 + #8 0xfffce3713f5f in __libc_start_main (/lib64/libc.so.6+0x23f5f) + #9 0xaaad7bb2bf73 (/Images/qemu/build/aarch64-softmmu/qemu-system-aarch64+0x8fbf73) + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +--- + migration/rdma.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/migration/rdma.c b/migration/rdma.c +index 3036221e..b5fdb6a7 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -4068,6 +4068,9 @@ void rdma_start_incoming_migration(const char *host_port, Error **errp) + return; + err: + error_propagate(errp, local_err); ++ if (rdma) { ++ g_free(rdma->host); ++ } + g_free(rdma); + g_free(rdma_return_path); + } +-- +2.23.0 diff --git a/migration-register_savevm_live-doesn-t-need-dev.patch b/migration-register_savevm_live-doesn-t-need-dev.patch new file mode 100644 index 0000000000000000000000000000000000000000..a980deccbcac98d709a35e62f41c7d52e39b0d11 --- /dev/null +++ b/migration-register_savevm_live-doesn-t-need-dev.patch @@ -0,0 +1,201 @@ +From 0f7cde69416f85ec3d3f57769ae38db3d72fda8c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 22 Aug 2019 12:54:33 +0100 +Subject: [PATCH] migration: register_savevm_live doesn't need dev + +Commit 78dd48df3 removed the last caller of register_savevm_live for an +instantiable device (rather than a single system wide device); +so trim out the parameter. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20190822115433.12070-1-dgilbert@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Cornelia Huck +Signed-off-by: Dr. David Alan Gilbert +--- + docs/devel/migration.rst | 3 +-- + hw/ppc/spapr.c | 2 +- + hw/s390x/s390-skeys.c | 2 +- + hw/s390x/s390-stattrib.c | 2 +- + hw/s390x/tod.c | 2 +- + include/migration/register.h | 3 +-- + migration/block-dirty-bitmap.c | 2 +- + migration/block.c | 2 +- + migration/ram.c | 2 +- + migration/savevm.c | 23 +---------------------- + net/slirp.c | 2 +- + 11 files changed, 11 insertions(+), 34 deletions(-) + +diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst +index 220059679a..cc6f839fce 100644 +--- a/docs/devel/migration.rst ++++ b/docs/devel/migration.rst +@@ -183,8 +183,7 @@ another to load the state back. + + .. code:: c + +- int register_savevm_live(DeviceState *dev, +- const char *idstr, ++ int register_savevm_live(const char *idstr, + int instance_id, + int version_id, + SaveVMHandlers *ops, +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index b0f37c34a4..289967c3de 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, ++ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c +index e5bd92c0c7..fb7d57865d 100644 +--- a/hw/s390x/s390-skeys.c ++++ b/hw/s390x/s390-skeys.c +@@ -388,7 +388,7 @@ static inline void s390_skeys_set_migration_enabled(Object *obj, bool value, + ss->migration_enabled = value; + + if (ss->migration_enabled) { +- register_savevm_live(NULL, TYPE_S390_SKEYS, 0, 1, ++ register_savevm_live(TYPE_S390_SKEYS, 0, 1, + &savevm_s390_storage_keys, ss); + } else { + unregister_savevm(DEVICE(ss), TYPE_S390_SKEYS, ss); +diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c +index 766f2015a4..5ee15d5e82 100644 +--- a/hw/s390x/s390-stattrib.c ++++ b/hw/s390x/s390-stattrib.c +@@ -382,7 +382,7 @@ static void s390_stattrib_instance_init(Object *obj) + { + S390StAttribState *sas = S390_STATTRIB(obj); + +- register_savevm_live(NULL, TYPE_S390_STATTRIB, 0, 0, ++ register_savevm_live(TYPE_S390_STATTRIB, 0, 0, + &savevm_s390_stattrib_handlers, sas); + + object_property_add_bool(obj, "migration-enabled", +diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c +index a9fca8eb0b..d6b22bb966 100644 +--- a/hw/s390x/tod.c ++++ b/hw/s390x/tod.c +@@ -100,7 +100,7 @@ static void s390_tod_realize(DeviceState *dev, Error **errp) + S390TODState *td = S390_TOD(dev); + + /* Legacy migration interface */ +- register_savevm_live(NULL, "todclock", 0, 1, &savevm_tod, td); ++ register_savevm_live("todclock", 0, 1, &savevm_tod, td); + } + + static void s390_tod_class_init(ObjectClass *oc, void *data) +diff --git a/include/migration/register.h b/include/migration/register.h +index 8b2bc5b129..f3ba10b6ef 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -68,8 +68,7 @@ typedef struct SaveVMHandlers { + int (*resume_prepare)(MigrationState *s, void *opaque); + } SaveVMHandlers; + +-int register_savevm_live(DeviceState *dev, +- const char *idstr, ++int register_savevm_live(const char *idstr, + uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index 4a896a09eb..11e8feb595 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -733,7 +733,7 @@ void dirty_bitmap_mig_init(void) + { + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list); + +- register_savevm_live(NULL, "dirty-bitmap", 0, 1, ++ register_savevm_live("dirty-bitmap", 0, 1, + &savevm_dirty_bitmap_handlers, + &dirty_bitmap_mig_state); + } +diff --git a/migration/block.c b/migration/block.c +index 91f98ef44a..ec15d1d6b3 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -1030,6 +1030,6 @@ void blk_mig_init(void) + QSIMPLEQ_INIT(&block_mig_state.blk_list); + qemu_mutex_init(&block_mig_state.lock); + +- register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, ++ register_savevm_live("block", 0, 1, &savevm_block_handlers, + &block_mig_state); + } +diff --git a/migration/ram.c b/migration/ram.c +index d6657a8093..2077ba5be4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -5125,5 +5125,5 @@ static SaveVMHandlers savevm_ram_handlers = { + void ram_mig_init(void) + { + qemu_mutex_init(&XBZRLE.lock); +- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); ++ register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); + } +diff --git a/migration/savevm.c b/migration/savevm.c +index f0974380e5..cdb79222a4 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -683,8 +683,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + of the system, so instance_id should be removed/replaced. + Meanwhile pass -1 as instance_id if you do not already have a clearly + distinguishing id for all instances of your device class. */ +-int register_savevm_live(DeviceState *dev, +- const char *idstr, ++int register_savevm_live(const char *idstr, + uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, +@@ -703,26 +702,6 @@ int register_savevm_live(DeviceState *dev, + se->is_ram = 1; + } + +- if (dev) { +- char *id = qdev_get_dev_path(dev); +- if (id) { +- if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >= +- sizeof(se->idstr)) { +- error_report("Path too long for VMState (%s)", id); +- g_free(id); +- g_free(se); +- +- return -1; +- } +- g_free(id); +- +- se->compat = g_new0(CompatEntry, 1); +- pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr); +- se->compat->instance_id = instance_id == -1 ? +- calculate_compat_instance_id(idstr) : instance_id; +- instance_id = -1; +- } +- } + pstrcat(se->idstr, sizeof(se->idstr), idstr); + + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { +diff --git a/net/slirp.c b/net/slirp.c +index b34cb29276..f42f496641 100644 +--- a/net/slirp.c ++++ b/net/slirp.c +@@ -576,7 +576,7 @@ static int net_slirp_init(NetClientState *peer, const char *model, + * specific version? + */ + g_assert(slirp_state_version() == 4); +- register_savevm_live(NULL, "slirp", 0, slirp_state_version(), ++ register_savevm_live("slirp", 0, slirp_state_version(), + &savevm_slirp_state, s->slirp); + + s->poll_notifier.notify = net_slirp_poll_notify; +-- +2.27.0 + diff --git a/migration-savevm-release-gslist-after-dump_vmstate_j.patch b/migration-savevm-release-gslist-after-dump_vmstate_j.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5ec9b881005dc21ec927a9f4b37f57999c89c1f --- /dev/null +++ b/migration-savevm-release-gslist-after-dump_vmstate_j.patch @@ -0,0 +1,63 @@ +From 0d8c145e986d4f500f065d2d8645e95175324e62 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Wed, 19 Feb 2020 17:47:05 +0800 +Subject: [PATCH 8/9] migration/savevm: release gslist after dump_vmstate_json + +'list' forgot to free at the end of dump_vmstate_json_to_file(), although it's called only once, but seems like a clean code. + +Fix the leak as follow: +Direct leak of 16 byte(s) in 1 object(s) allocated from: + #0 0x7fb946abd768 in __interceptor_malloc (/lib64/libasan.so.5+0xef768) + #1 0x7fb945eca445 in g_malloc (/lib64/libglib-2.0.so.0+0x52445) + #2 0x7fb945ee2066 in g_slice_alloc (/lib64/libglib-2.0.so.0+0x6a066) + #3 0x7fb945ee3139 in g_slist_prepend (/lib64/libglib-2.0.so.0+0x6b139) + #4 0x5585db591581 in object_class_get_list_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1084 + #5 0x5585db590f66 in object_class_foreach_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1028 + #6 0x7fb945eb35f7 in g_hash_table_foreach (/lib64/libglib-2.0.so.0+0x3b5f7) + #7 0x5585db59110c in object_class_foreach /mnt/sdb/qemu-new/qemu/qom/object.c:1038 + #8 0x5585db5916b6 in object_class_get_list /mnt/sdb/qemu-new/qemu/qom/object.c:1092 + #9 0x5585db335ca0 in dump_vmstate_json_to_file /mnt/sdb/qemu-new/qemu/migration/savevm.c:638 + #10 0x5585daa5bcbf in main /mnt/sdb/qemu-new/qemu/vl.c:4420 + #11 0x7fb941204812 in __libc_start_main ../csu/libc-start.c:308 + #12 0x5585da29420d in _start (/mnt/sdb/qemu-new/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x27f020d) + +Indirect leak of 7472 byte(s) in 467 object(s) allocated from: + #0 0x7fb946abd768 in __interceptor_malloc (/lib64/libasan.so.5+0xef768) + #1 0x7fb945eca445 in g_malloc (/lib64/libglib-2.0.so.0+0x52445) + #2 0x7fb945ee2066 in g_slice_alloc (/lib64/libglib-2.0.so.0+0x6a066) + #3 0x7fb945ee3139 in g_slist_prepend (/lib64/libglib-2.0.so.0+0x6b139) + #4 0x5585db591581 in object_class_get_list_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1084 + #5 0x5585db590f66 in object_class_foreach_tramp /mnt/sdb/qemu-new/qemu/qom/object.c:1028 + #6 0x7fb945eb35f7 in g_hash_table_foreach (/lib64/libglib-2.0.so.0+0x3b5f7) + #7 0x5585db59110c in object_class_foreach /mnt/sdb/qemu-new/qemu/qom/object.c:1038 + #8 0x5585db5916b6 in object_class_get_list /mnt/sdb/qemu-new/qemu/qom/object.c:1092 + #9 0x5585db335ca0 in dump_vmstate_json_to_file /mnt/sdb/qemu-new/qemu/migration/savevm.c:638 + #10 0x5585daa5bcbf in main /mnt/sdb/qemu-new/qemu/vl.c:4420 + #11 0x7fb941204812 in __libc_start_main ../csu/libc-start.c:308 + #12 0x5585da29420d in _start (/mnt/sdb/qemu-new/qemu/build/x86_64-softmmu/qemu-system-x86_64+0x27f020d) + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Reviewed-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela +Signed-off-by: AlexChen +--- + migration/savevm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/savevm.c b/migration/savevm.c +index 7d89c57..8163de7 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -614,6 +614,7 @@ void dump_vmstate_json_to_file(FILE *out_file) + } + fprintf(out_file, "\n}\n"); + fclose(out_file); ++ g_slist_free(list); + } + + static uint32_t calculate_new_instance_id(const char *idstr) +-- +1.8.3.1 + diff --git a/migration-tls-add-error-handling-in-multifd_tls_hand.patch b/migration-tls-add-error-handling-in-multifd_tls_hand.patch new file mode 100644 index 0000000000000000000000000000000000000000..de444af35d9713e092f98a89485a8a8c590a2203 --- /dev/null +++ b/migration-tls-add-error-handling-in-multifd_tls_hand.patch @@ -0,0 +1,42 @@ +From 4bf84b63bf1b2fba031fc6c3f4948785d534df3b Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Fri, 5 Mar 2021 16:10:57 +0800 +Subject: [PATCH] migration/tls: add error handling in + multifd_tls_handshake_thread + +If any error happens during multifd send thread creating (e.g. channel broke +because new domain is destroyed by the dst), multifd_tls_handshake_thread +may exit silently, leaving main migration thread hanging (ram_save_setup -> +multifd_send_sync_main -> qemu_sem_wait(&p->sem_sync)). +Fix that by adding error handling in multifd_tls_handshake_thread. + +Signed-off-by: Hao Wang +--- + migration/ram.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3338363e9d..d4ac696899 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1516,7 +1516,16 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + } else { + trace_multifd_tls_outgoing_handshake_complete(ioc); + } +- multifd_channel_connect(p, ioc, err); ++ ++ if (!multifd_channel_connect(p, ioc, err)) { ++ /* ++ * Error happen, mark multifd_send_thread status as 'quit' although it ++ * is not created, and then tell who pay attention to me. ++ */ ++ p->quit = true; ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ } + } + + static void *multifd_tls_handshake_thread(void *opaque) +-- +2.27.0 + diff --git a/migration-tls-add-support-for-multifd-tls-handshake.patch b/migration-tls-add-support-for-multifd-tls-handshake.patch new file mode 100644 index 0000000000000000000000000000000000000000..f81bb6194cdd98b8a83046cc6cfc831d108d5aae --- /dev/null +++ b/migration-tls-add-support-for-multifd-tls-handshake.patch @@ -0,0 +1,125 @@ +From e283c7dab15fed5af2904480230f86cf81b67aed Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 11:38:37 +0800 +Subject: [PATCH] migration/tls: add support for multifd tls-handshake +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Similar like migration main thread, we need to do handshake +for each multifd thread. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Reviewed-by: Daniel P. Berrangé +Message-Id: <1600139042-104593-6-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 75 insertions(+), 2 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 2b9d00745c..b82c0e6562 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -38,6 +38,7 @@ + #include "ram.h" + #include "migration.h" + #include "socket.h" ++#include "tls.h" + #include "migration/register.h" + #include "migration/misc.h" + #include "qemu-file.h" +@@ -1200,6 +1201,77 @@ out: + return NULL; + } + ++static bool multifd_channel_connect(MultiFDSendParams *p, ++ QIOChannel *ioc, ++ Error *error); ++ ++static void multifd_tls_outgoing_handshake(QIOTask *task, ++ gpointer opaque) ++{ ++ MultiFDSendParams *p = opaque; ++ QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); ++ Error *err = NULL; ++ ++ qio_task_propagate_error(task, &err); ++ multifd_channel_connect(p, ioc, err); ++} ++ ++static void multifd_tls_channel_connect(MultiFDSendParams *p, ++ QIOChannel *ioc, ++ Error **errp) ++{ ++ MigrationState *s = migrate_get_current(); ++ const char *hostname = p->tls_hostname; ++ QIOChannelTLS *tioc; ++ ++ tioc = migration_tls_client_create(s, ioc, hostname, errp); ++ if (!tioc) { ++ return; ++ } ++ ++ qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); ++ qio_channel_tls_handshake(tioc, ++ multifd_tls_outgoing_handshake, ++ p, ++ NULL, ++ NULL); ++ ++} ++ ++static bool multifd_channel_connect(MultiFDSendParams *p, ++ QIOChannel *ioc, ++ Error *error) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ if (!error) { ++ if (s->parameters.tls_creds && ++ *s->parameters.tls_creds && ++ !object_dynamic_cast(OBJECT(ioc), ++ TYPE_QIO_CHANNEL_TLS)) { ++ multifd_tls_channel_connect(p, ioc, &error); ++ if (!error) { ++ /* ++ * tls_channel_connect will call back to this ++ * function after the TLS handshake, ++ * so we mustn't call multifd_send_thread until then ++ */ ++ return false; ++ } else { ++ return true; ++ } ++ } else { ++ /* update for tls qio channel */ ++ p->c = ioc; ++ qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, ++ QEMU_THREAD_JOINABLE); ++ } ++ return false; ++ } ++ ++ return true; ++} ++ + static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, + QIOChannel *ioc, Error *err) + { +@@ -1229,8 +1301,9 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); + p->running = true; +- qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, +- QEMU_THREAD_JOINABLE); ++ if (multifd_channel_connect(p, sioc, local_err)) { ++ goto cleanup; ++ } + return; + } + +-- +2.27.0 + diff --git a/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch b/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b06a42fad428d20035072601afbcd139b77c291 --- /dev/null +++ b/migration-tls-add-tls_hostname-into-MultiFDSendParam.patch @@ -0,0 +1,66 @@ +From 0aff29297923b32e919ce944030a043e0826d9aa Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 11:25:44 +0800 +Subject: [PATCH] migration/tls: add tls_hostname into MultiFDSendParams +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since multifd creation is async with migration_channel_connect, we should +pass the hostname from MigrationState to MultiFDSendParams. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Message-Id: <1600139042-104593-4-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 1a33c7b3e2..bb8f383c3b 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -621,6 +621,8 @@ typedef struct { + uint8_t id; + /* channel thread name */ + char *name; ++ /* tls hostname */ ++ char *tls_hostname; + /* channel thread id */ + QemuThread thread; + /* communication channel */ +@@ -1041,6 +1043,8 @@ void multifd_save_cleanup(void) + qemu_sem_destroy(&p->sem_sync); + g_free(p->name); + p->name = NULL; ++ g_free(p->tls_hostname); ++ p->tls_hostname = NULL; + multifd_pages_clear(p->pages); + p->pages = NULL; + p->packet_len = 0; +@@ -1229,10 +1233,12 @@ int multifd_save_setup(void) + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; ++ MigrationState *s; + + if (!migrate_use_multifd()) { + return 0; + } ++ s = migrate_get_current(); + thread_count = migrate_multifd_channels(); + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); +@@ -1253,6 +1259,7 @@ int multifd_save_setup(void) + + sizeof(ram_addr_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->name = g_strdup_printf("multifdsend_%d", i); ++ p->tls_hostname = g_strdup(s->hostname); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + return 0; +-- +2.27.0 + diff --git a/migration-tls-add-trace-points-for-multifd-tls.patch b/migration-tls-add-trace-points-for-multifd-tls.patch new file mode 100644 index 0000000000000000000000000000000000000000..a49ef1faad30e725b45c01e10812df9b7b72b7b3 --- /dev/null +++ b/migration-tls-add-trace-points-for-multifd-tls.patch @@ -0,0 +1,73 @@ +From 83cbd3a645e9376a25cd359e8f12f8db025bf071 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 13:56:11 +0800 +Subject: [PATCH] migration/tls: add trace points for multifd-tls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +add trace points for multifd-tls for debug. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Reviewed-by: Daniel P. Berrangé +Message-Id: <1600139042-104593-7-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 10 +++++++++- + migration/trace-events | 4 ++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index b82c0e6562..3ded38c0be 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1212,7 +1212,11 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); + Error *err = NULL; + +- qio_task_propagate_error(task, &err); ++ if (qio_task_propagate_error(task, &err)) { ++ trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); ++ } else { ++ trace_multifd_tls_outgoing_handshake_complete(ioc); ++ } + multifd_channel_connect(p, ioc, err); + } + +@@ -1229,6 +1233,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, + return; + } + ++ trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + qio_channel_tls_handshake(tioc, + multifd_tls_outgoing_handshake, +@@ -1244,6 +1249,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + { + MigrationState *s = migrate_get_current(); + ++ trace_multifd_set_outgoing_channel( ++ ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); ++ + if (!error) { + if (s->parameters.tls_creds && + *s->parameters.tls_creds && +diff --git a/migration/trace-events b/migration/trace-events +index 69620c43c2..c0640cd424 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -93,6 +93,10 @@ multifd_send_sync_main_signal(uint8_t id) "channel %d" + multifd_send_sync_main_wait(uint8_t id) "channel %d" + multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%d" ++multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" ++multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" ++multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" ++multifd_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname, void *err) "ioc=%p ioctype=%s hostname=%s err=%p" + ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: start: %" PRIx64 " %zx" + ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: addr: 0x%" PRIx64 " flags: 0x%x host: %p" + ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x" +-- +2.27.0 + diff --git a/migration-tls-extract-cleanup-function-for-common-us.patch b/migration-tls-extract-cleanup-function-for-common-us.patch new file mode 100644 index 0000000000000000000000000000000000000000..5ac83e9200020300060317065bfbfeca0ebf84e2 --- /dev/null +++ b/migration-tls-extract-cleanup-function-for-common-us.patch @@ -0,0 +1,82 @@ +From 29914b97b20a6415476095c913607412a3f7572f Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 11:32:44 +0800 +Subject: [PATCH] migration/tls: extract cleanup function for common-use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +multifd channel cleanup is need if multifd handshake failed, +let's extract it. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Reviewed-by: Daniel P. Berrangé +Message-Id: <1600139042-104593-5-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 34 ++++++++++++++++++++++------------ + 1 file changed, 22 insertions(+), 12 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index bb8f383c3b..2b9d00745c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1200,6 +1200,23 @@ out: + return NULL; + } + ++static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, ++ QIOChannel *ioc, Error *err) ++{ ++ migrate_set_error(migrate_get_current(), err); ++ /* Error happen, we need to tell who pay attention to me */ ++ qemu_sem_post(&multifd_send_state->channels_ready); ++ qemu_sem_post(&p->sem_sync); ++ /* ++ * Although multifd_send_thread is not created, but main migration ++ * thread neet to judge whether it is running, so we need to mark ++ * its status. ++ */ ++ p->quit = true; ++ object_unref(OBJECT(ioc)); ++ error_free(err); ++} ++ + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + { + MultiFDSendParams *p = opaque; +@@ -1207,25 +1224,18 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + Error *local_err = NULL; + + if (qio_task_propagate_error(task, &local_err)) { +- migrate_set_error(migrate_get_current(), local_err); +- /* Error happen, we need to tell who pay attention to me */ +- qemu_sem_post(&multifd_send_state->channels_ready); +- qemu_sem_post(&p->sem_sync); +- /* +- * Although multifd_send_thread is not created, but main migration +- * thread neet to judge whether it is running, so we need to mark +- * its status. +- */ +- p->quit = true; +- object_unref(OBJECT(sioc)); +- error_free(local_err); ++ goto cleanup; + } else { + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); + p->running = true; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); ++ return; + } ++ ++cleanup: ++ multifd_new_send_channel_cleanup(p, sioc, local_err); + } + + int multifd_save_setup(void) +-- +2.27.0 + diff --git a/migration-tls-extract-migration_tls_client_create-fo.patch b/migration-tls-extract-migration_tls_client_create-fo.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f538332241d60313f87533cff62785c11e98f39 --- /dev/null +++ b/migration-tls-extract-migration_tls_client_create-fo.patch @@ -0,0 +1,109 @@ +From 4ffa2ea3749066a0444b69ef16ec4e4d6cdad0e1 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Tue, 15 Sep 2020 11:03:58 +0800 +Subject: [PATCH] migration/tls: extract migration_tls_client_create for + common-use +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +migration_tls_client_create will be used in multifd-tls, let's +extract it. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Reviewed-by: Daniel P. Berrangé +Message-Id: <1600139042-104593-3-git-send-email-zhengchuan@huawei.com> +Signed-off-by: Dr. David Alan Gilbert +--- + migration/tls.c | 26 ++++++++++++++++++-------- + migration/tls.h | 6 ++++++ + 2 files changed, 24 insertions(+), 8 deletions(-) + +diff --git a/migration/tls.c b/migration/tls.c +index a0eb553e14..1d5b571d8e 100644 +--- a/migration/tls.c ++++ b/migration/tls.c +@@ -22,7 +22,6 @@ + #include "channel.h" + #include "migration.h" + #include "tls.h" +-#include "io/channel-tls.h" + #include "crypto/tlscreds.h" + #include "qemu/error-report.h" + #include "qapi/error.h" +@@ -126,11 +125,10 @@ static void migration_tls_outgoing_handshake(QIOTask *task, + object_unref(OBJECT(ioc)); + } + +- +-void migration_tls_channel_connect(MigrationState *s, +- QIOChannel *ioc, +- const char *hostname, +- Error **errp) ++QIOChannelTLS *migration_tls_client_create(MigrationState *s, ++ QIOChannel *ioc, ++ const char *hostname, ++ Error **errp) + { + QCryptoTLSCreds *creds; + QIOChannelTLS *tioc; +@@ -138,7 +136,7 @@ void migration_tls_channel_connect(MigrationState *s, + creds = migration_tls_get_creds( + s, QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT, errp); + if (!creds) { +- return; ++ return NULL; + } + + if (s->parameters.tls_hostname && *s->parameters.tls_hostname) { +@@ -146,11 +144,23 @@ void migration_tls_channel_connect(MigrationState *s, + } + if (!hostname) { + error_setg(errp, "No hostname available for TLS"); +- return; ++ return NULL; + } + + tioc = qio_channel_tls_new_client( + ioc, creds, hostname, errp); ++ ++ return tioc; ++} ++ ++void migration_tls_channel_connect(MigrationState *s, ++ QIOChannel *ioc, ++ const char *hostname, ++ Error **errp) ++{ ++ QIOChannelTLS *tioc; ++ ++ tioc = migration_tls_client_create(s, ioc, hostname, errp); + if (!tioc) { + return; + } +diff --git a/migration/tls.h b/migration/tls.h +index cdd70001ed..0cfbe368ba 100644 +--- a/migration/tls.h ++++ b/migration/tls.h +@@ -22,11 +22,17 @@ + #define QEMU_MIGRATION_TLS_H + + #include "io/channel.h" ++#include "io/channel-tls.h" + + void migration_tls_channel_process_incoming(MigrationState *s, + QIOChannel *ioc, + Error **errp); + ++QIOChannelTLS *migration_tls_client_create(MigrationState *s, ++ QIOChannel *ioc, ++ const char *hostname, ++ Error **errp); ++ + void migration_tls_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, +-- +2.27.0 + diff --git a/migration-tls-fix-inverted-semantics-in-multifd_chan.patch b/migration-tls-fix-inverted-semantics-in-multifd_chan.patch new file mode 100644 index 0000000000000000000000000000000000000000..3f5a52aa6db6d5ecd034a1fc2f98c994b84ade64 --- /dev/null +++ b/migration-tls-fix-inverted-semantics-in-multifd_chan.patch @@ -0,0 +1,55 @@ +From ee0d1b508a144ab390fb7bc8b7a4fe3161aebecf Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Fri, 5 Mar 2021 16:09:29 +0800 +Subject: [PATCH] migration/tls: fix inverted semantics in + multifd_channel_connect + +Function multifd_channel_connect() return "true" to indicate failure, +which is rather confusing. Fix that. + +Signed-off-by: Hao Wang +--- + migration/ram.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index ba1e729c39..3338363e9d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1575,9 +1575,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + * function after the TLS handshake, + * so we mustn't call multifd_send_thread until then + */ +- return false; +- } else { + return true; ++ } else { ++ return false; + } + } else { + /* update for tls qio channel */ +@@ -1585,10 +1585,10 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + } +- return false; ++ return true; + } + +- return true; ++ return false; + } + + static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, +@@ -1620,7 +1620,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + p->c = QIO_CHANNEL(sioc); + qio_channel_set_delay(p->c, false); + p->running = true; +- if (multifd_channel_connect(p, sioc, local_err)) { ++ if (!multifd_channel_connect(p, sioc, local_err)) { + goto cleanup; + } + return; +-- +2.27.0 + diff --git a/migration-tls-save-hostname-into-MigrationState.patch b/migration-tls-save-hostname-into-MigrationState.patch new file mode 100644 index 0000000000000000000000000000000000000000..538a8f69179d536f2f5bc307a66d4c900c5fd790 --- /dev/null +++ b/migration-tls-save-hostname-into-MigrationState.patch @@ -0,0 +1,77 @@ +From 08ae1eda02ff08b3431b227ed702ea0fc5f8a4a2 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Tue, 15 Sep 2020 11:03:57 +0800 +Subject: [PATCH] migration/tls: save hostname into MigrationState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +hostname is need in multifd-tls, save hostname into MigrationState. + +Signed-off-by: Chuan Zheng +Signed-off-by: Yan Jin +Message-Id: <1600139042-104593-2-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +--- + migration/channel.c | 1 + + migration/migration.c | 1 + + migration/migration.h | 5 +++++ + migration/tls.c | 2 ++ + 4 files changed, 9 insertions(+) + +diff --git a/migration/channel.c b/migration/channel.c +index 7462181484..46ed40b89c 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -99,5 +99,6 @@ void migration_channel_connect(MigrationState *s, + } + } + migrate_fd_connect(s, error); ++ g_free(s->hostname); + error_free(error); + } +diff --git a/migration/migration.c b/migration/migration.c +index 7949f2a40b..993d77b7d6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1710,6 +1710,7 @@ void migrate_init(MigrationState *s) + s->migration_thread_running = false; + error_free(s->error); + s->error = NULL; ++ s->hostname = NULL; + + migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); + +diff --git a/migration/migration.h b/migration/migration.h +index feb344306a..e5aaf2ef70 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -259,6 +259,11 @@ struct MigrationState + * (which is in 4M chunk). + */ + uint8_t clear_bitmap_shift; ++ ++ /* ++ * This save hostname when out-going migration starts ++ */ ++ char *hostname; + }; + + void migrate_set_state(int *state, int old_state, int new_state); +diff --git a/migration/tls.c b/migration/tls.c +index 5171afc6c4..a0eb553e14 100644 +--- a/migration/tls.c ++++ b/migration/tls.c +@@ -155,6 +155,8 @@ void migration_tls_channel_connect(MigrationState *s, + return; + } + ++ /* Save hostname into MigrationState for handshake */ ++ s->hostname = g_strdup(hostname); + trace_migration_tls_outgoing_handshake_start(hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-outgoing"); + qio_channel_tls_handshake(tioc, +-- +2.27.0 + diff --git a/migration-update-ram_counters-for-multifd-sync-packe.patch b/migration-update-ram_counters-for-multifd-sync-packe.patch new file mode 100644 index 0000000000000000000000000000000000000000..838380403f1ac31df0f2befd62a14711cee71e58 --- /dev/null +++ b/migration-update-ram_counters-for-multifd-sync-packe.patch @@ -0,0 +1,35 @@ +From e93040851d683f1f7750acfa0e862b4405678f24 Mon Sep 17 00:00:00 2001 +From: Zheng Chuan +Date: Fri, 24 Apr 2020 11:50:41 +0800 +Subject: [PATCH 04/10] migration: update ram_counters for multifd sync packet + +Multifd sync will send MULTIFD_FLAG_SYNC flag info to destination, add +these bytes to ram_counters record. + +Change-Id: I885166f412f58e74de40ea6ffec1c35e82ae4619 +Signed-off-by: Ivan Ren +Suggested-by: Wei Yang +Message-Id: <1564464816-21804-4-git-send-email-ivanren@tencent.com> +Reviewed-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 88ddd2bb..c75716bb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1085,6 +1085,10 @@ static void multifd_send_sync_main(RAMState *rs) + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; + qemu_file_update_transfer(rs->f, p->packet_len); ++ ram_counters.multifd_bytes += p->packet_len; ++ ram_counters.transferred += p->packet_len; ++ ram_counters.multifd_bytes += p->packet_len; ++ ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +-- +2.19.1 diff --git a/migration-use-migration_is_active-to-represent-activ.patch b/migration-use-migration_is_active-to-represent-activ.patch new file mode 100644 index 0000000000000000000000000000000000000000..c9e926ad8e16dcfe9c931fae96cc9b8ad8e0cb93 --- /dev/null +++ b/migration-use-migration_is_active-to-represent-activ.patch @@ -0,0 +1,68 @@ +From 9662d44633dd4582dc47d58f63ee63b2c8f60a4f Mon Sep 17 00:00:00 2001 +From: Wei Yang +Date: Wed, 17 Jul 2019 08:53:41 +0800 +Subject: [PATCH] migration: use migration_is_active to represent active state + +Wrap the check into a function to make it easy to read. + +Signed-off-by: Wei Yang +Message-Id: <20190717005341.14140-1-richardw.yang@linux.intel.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +--- + include/migration/misc.h | 1 + + migration/migration.c | 12 ++++++++---- + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/include/migration/misc.h b/include/migration/misc.h +index 5cdbabd094..42d6abc920 100644 +--- a/include/migration/misc.h ++++ b/include/migration/misc.h +@@ -61,6 +61,7 @@ void migration_object_init(void); + void migration_shutdown(void); + void qemu_start_incoming_migration(const char *uri, Error **errp); + bool migration_is_idle(void); ++bool migration_is_active(MigrationState *); + void add_migration_state_change_notifier(Notifier *notify); + void remove_migration_state_change_notifier(Notifier *notify); + bool migration_in_setup(MigrationState *); +diff --git a/migration/migration.c b/migration/migration.c +index 9b40380d7c..fd7d81d4b6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1578,8 +1578,7 @@ static void migrate_fd_cleanup(MigrationState *s) + qemu_fclose(tmp); + } + +- assert((s->state != MIGRATION_STATUS_ACTIVE) && +- (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE)); ++ assert(!migration_is_active(s)); + + if (s->state == MIGRATION_STATUS_CANCELLING) { + migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING, +@@ -1741,6 +1740,12 @@ bool migration_is_idle(void) + return false; + } + ++bool migration_is_active(MigrationState *s) ++{ ++ return (s->state == MIGRATION_STATUS_ACTIVE || ++ s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); ++} ++ + void migrate_init(MigrationState *s) + { + /* +@@ -3307,8 +3312,7 @@ static void *migration_thread(void *opaque) + + trace_migration_thread_setup_complete(); + +- while (s->state == MIGRATION_STATUS_ACTIVE || +- s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { ++ while (migration_is_active(s)) { + int64_t current_time; + + if (urgent || !qemu_file_rate_limit(s->to_dst_file)) { +-- +2.27.0 + diff --git a/mirror-Fix-bdrv_has_zero_init-use.patch b/mirror-Fix-bdrv_has_zero_init-use.patch new file mode 100644 index 0000000000000000000000000000000000000000..54fde6927f378bdddbe92495f8b5616dfd3f6953 --- /dev/null +++ b/mirror-Fix-bdrv_has_zero_init-use.patch @@ -0,0 +1,205 @@ +From 7fcb1c1a956a8cad5c2e8585e53878edc4fd9eca Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Wed, 24 Jul 2019 19:12:30 +0200 +Subject: [PATCH] mirror: Fix bdrv_has_zero_init() use + +bdrv_has_zero_init() only has meaning for newly created images or image +areas. If the mirror job itself did not create the image, it cannot +rely on bdrv_has_zero_init()'s result to carry any meaning. + +This is the case for drive-mirror with mode=existing and always for +blockdev-mirror. + +Note that we only have to zero-initialize the target with sync=full, +because other modes actually do not promise that the target will contain +the same data as the source after the job -- sync=top only promises to +copy anything allocated in the top layer, and sync=none will only copy +new I/O. (Which is how mirror has always handled it.) + +Signed-off-by: Max Reitz +Message-id: 20190724171239.8764-3-mreitz@redhat.com +Reviewed-by: Maxim Levitsky +Signed-off-by: Max Reitz +--- + block/mirror.c | 11 ++++++++--- + blockdev.c | 16 +++++++++++++--- + include/block/block_int.h | 2 ++ + tests/test-block-iothread.c | 2 +- + 4 files changed, 24 insertions(+), 7 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index ccae49a28e..89a053b265 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -51,6 +51,8 @@ typedef struct MirrorBlockJob { + Error *replace_blocker; + bool is_none_mode; + BlockMirrorBackingMode backing_mode; ++ /* Whether the target image requires explicit zero-initialization */ ++ bool zero_target; + MirrorCopyMode copy_mode; + BlockdevOnError on_source_error, on_target_error; + bool synced; +@@ -779,7 +781,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) + int ret; + int64_t count; + +- if (base == NULL && !bdrv_has_zero_init(target_bs)) { ++ if (s->zero_target) { + if (!bdrv_can_write_zeroes_with_unmap(target_bs)) { + bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length); + return 0; +@@ -1531,6 +1533,7 @@ static BlockJob *mirror_start_job( + const char *replaces, int64_t speed, + uint32_t granularity, int64_t buf_size, + BlockMirrorBackingMode backing_mode, ++ bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, +@@ -1658,6 +1661,7 @@ static BlockJob *mirror_start_job( + s->on_target_error = on_target_error; + s->is_none_mode = is_none_mode; + s->backing_mode = backing_mode; ++ s->zero_target = zero_target; + s->copy_mode = copy_mode; + s->base = base; + s->granularity = granularity; +@@ -1762,6 +1766,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, ++ bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, +@@ -1779,7 +1784,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + is_none_mode = mode == MIRROR_SYNC_MODE_NONE; + base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; + mirror_start_job(job_id, bs, creation_flags, target, replaces, +- speed, granularity, buf_size, backing_mode, ++ speed, granularity, buf_size, backing_mode, zero_target, + on_source_error, on_target_error, unmap, NULL, NULL, + &mirror_job_driver, is_none_mode, base, false, + filter_node_name, true, copy_mode, errp); +@@ -1806,7 +1811,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + + ret = mirror_start_job( + job_id, bs, creation_flags, base, NULL, speed, 0, 0, +- MIRROR_LEAVE_BACKING_CHAIN, ++ MIRROR_LEAVE_BACKING_CHAIN, false, + on_error, on_error, true, cb, opaque, + &commit_active_job_driver, false, base, auto_complete, + filter_node_name, false, MIRROR_COPY_MODE_BACKGROUND, +diff --git a/blockdev.c b/blockdev.c +index 94e5aee30b..4435795b6d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3739,6 +3739,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + bool has_replaces, const char *replaces, + enum MirrorSyncMode sync, + BlockMirrorBackingMode backing_mode, ++ bool zero_target, + bool has_speed, int64_t speed, + bool has_granularity, uint32_t granularity, + bool has_buf_size, int64_t buf_size, +@@ -3847,7 +3848,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + */ + mirror_start(job_id, bs, target, + has_replaces ? replaces : NULL, job_flags, +- speed, granularity, buf_size, sync, backing_mode, ++ speed, granularity, buf_size, sync, backing_mode, zero_target, + on_source_error, on_target_error, unmap, filter_node_name, + copy_mode, errp); + } +@@ -3863,6 +3864,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + int flags; + int64_t size; + const char *format = arg->format; ++ bool zero_target; + int ret; + + bs = qmp_get_root_bs(arg->device, errp); +@@ -3964,6 +3966,10 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + goto out; + } + ++ zero_target = (arg->sync == MIRROR_SYNC_MODE_FULL && ++ (arg->mode == NEW_IMAGE_MODE_EXISTING || ++ !bdrv_has_zero_init(target_bs))); ++ + ret = bdrv_try_set_aio_context(target_bs, aio_context, errp); + if (ret < 0) { + bdrv_unref(target_bs); +@@ -3972,7 +3978,8 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + + blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs, + arg->has_replaces, arg->replaces, arg->sync, +- backing_mode, arg->has_speed, arg->speed, ++ backing_mode, zero_target, ++ arg->has_speed, arg->speed, + arg->has_granularity, arg->granularity, + arg->has_buf_size, arg->buf_size, + arg->has_on_source_error, arg->on_source_error, +@@ -4012,6 +4019,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + AioContext *aio_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + Error *local_err = NULL; ++ bool zero_target; + int ret; + + bs = qmp_get_root_bs(device, errp); +@@ -4024,6 +4032,8 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + return; + } + ++ zero_target = (sync == MIRROR_SYNC_MODE_FULL); ++ + aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); + +@@ -4034,7 +4044,7 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id, + + blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs, + has_replaces, replaces, sync, backing_mode, +- has_speed, speed, ++ zero_target, has_speed, speed, + has_granularity, granularity, + has_buf_size, buf_size, + has_on_source_error, on_source_error, +diff --git a/include/block/block_int.h b/include/block/block_int.h +index 76117a761a..154b9b5501 100644 +--- a/include/block/block_int.h ++++ b/include/block/block_int.h +@@ -1120,6 +1120,7 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs, + * @buf_size: The amount of data that can be in flight at one time. + * @mode: Whether to collapse all images in the chain to the target. + * @backing_mode: How to establish the target's backing chain after completion. ++ * @zero_target: Whether the target should be explicitly zero-initialized + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. +@@ -1139,6 +1140,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs, + int creation_flags, int64_t speed, + uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockMirrorBackingMode backing_mode, ++ bool zero_target, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, const char *filter_node_name, +diff --git a/tests/test-block-iothread.c b/tests/test-block-iothread.c +index 1949d5e61a..debfb69bfb 100644 +--- a/tests/test-block-iothread.c ++++ b/tests/test-block-iothread.c +@@ -611,7 +611,7 @@ static void test_propagate_mirror(void) + + /* Start a mirror job */ + mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, +- MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, ++ MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, + &error_abort); +-- +2.27.0 + diff --git a/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch b/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch new file mode 100644 index 0000000000000000000000000000000000000000..52f07f951aef929e9fe58740955f97af25e5ba85 --- /dev/null +++ b/mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch @@ -0,0 +1,52 @@ +From e092a17d3825a8f2c93cb429aaa5d857b579b64c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 22 Jul 2019 17:44:27 +0200 +Subject: [PATCH] mirror: Keep mirror_top_bs drained after dropping permissions + +mirror_top_bs is currently implicitly drained through its connection to +the source or the target node. However, the drain section for target_bs +ends early after moving mirror_top_bs from src to target_bs, so that +requests can already be restarted while mirror_top_bs is still present +in the chain, but has dropped all permissions and therefore runs into an +assertion failure like this: + + qemu-system-x86_64: block/io.c:1634: bdrv_co_write_req_prepare: + Assertion `child->perm & BLK_PERM_WRITE' failed. + +Keep mirror_top_bs drained until all graph changes have completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Reviewed-by: Max Reitz +(cherry picked from commit d2da5e288a2e71e82866c8fdefd41b5727300124) +Signed-off-by: Michael Roth +--- + block/mirror.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 0e3f7923cf..681b305de6 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -661,7 +661,10 @@ static int mirror_exit_common(Job *job) + s->target = NULL; + + /* We don't access the source any more. Dropping any WRITE/RESIZE is +- * required before it could become a backing file of target_bs. */ ++ * required before it could become a backing file of target_bs. Not having ++ * these permissions any more means that we can't allow any new requests on ++ * mirror_top_bs from now on, so keep it drained. */ ++ bdrv_drained_begin(mirror_top_bs); + bs_opaque->stop = true; + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); +@@ -729,6 +732,7 @@ static int mirror_exit_common(Job *job) + bs_opaque->job = NULL; + + bdrv_drained_end(src); ++ bdrv_drained_end(mirror_top_bs); + s->in_drain = false; + bdrv_unref(mirror_top_bs); + bdrv_unref(src); +-- +2.23.0 diff --git a/mirror-Make-sure-that-source-and-target-size-match.patch b/mirror-Make-sure-that-source-and-target-size-match.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e4edd26ce42f4507240fe806ed7a2ce57364a41 --- /dev/null +++ b/mirror-Make-sure-that-source-and-target-size-match.patch @@ -0,0 +1,89 @@ +From 9f57569d541acaa4a76513d09ede7d2b19aa69ea Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 3 Jun 2020 16:03:24 +0100 +Subject: [PATCH] mirror: Make sure that source and target size match + +RH-Author: Kevin Wolf +Message-id: <20200603160325.67506-11-kwolf@redhat.com> +Patchwork-id: 97110 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH v2 10/11] mirror: Make sure that source and target size match +Bugzilla: 1778593 +RH-Acked-by: Eric Blake +RH-Acked-by: Max Reitz +RH-Acked-by: Stefano Garzarella + +If the target is shorter than the source, mirror would copy data until +it reaches the end of the target and then fail with an I/O error when +trying to write past the end. + +If the target is longer than the source, the mirror job would complete +successfully, but the target wouldn't actually be an accurate copy of +the source image (it would contain some additional garbage at the end). + +Fix this by checking that both images have the same size when the job +starts. + +Signed-off-by: Kevin Wolf +Reviewed-by: Eric Blake +Message-Id: <20200511135825.219437-4-kwolf@redhat.com> +Reviewed-by: Max Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit e83dd6808c6e0975970f37b49b27cc37bb54eea8) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + block/mirror.c | 21 ++++++++++++--------- + 1 file changed, 12 insertions(+), 9 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index ef6c958ff9..8f0d4544d8 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -853,6 +853,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + BlockDriverState *target_bs = blk_bs(s->target); + bool need_drain = true; + int64_t length; ++ int64_t target_length; + BlockDriverInfo bdi; + char backing_filename[2]; /* we only need 2 characters because we are only + checking for a NULL string */ +@@ -868,24 +869,26 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + goto immediate_exit; + } + ++ target_length = blk_getlength(s->target); ++ if (target_length < 0) { ++ ret = target_length; ++ goto immediate_exit; ++ } ++ + /* Active commit must resize the base image if its size differs from the + * active layer. */ + if (s->base == blk_bs(s->target)) { +- int64_t base_length; +- +- base_length = blk_getlength(s->target); +- if (base_length < 0) { +- ret = base_length; +- goto immediate_exit; +- } +- +- if (s->bdev_length > base_length) { ++ if (s->bdev_length > target_length) { + ret = blk_truncate(s->target, s->bdev_length, PREALLOC_MODE_OFF, + NULL); + if (ret < 0) { + goto immediate_exit; + } + } ++ } else if (s->bdev_length != target_length) { ++ error_setg(errp, "Source and target image have different sizes"); ++ ret = -EINVAL; ++ goto immediate_exit; + } + + if (s->bdev_length == 0) { +-- +2.27.0 + diff --git a/mirror-Wait-only-for-in-flight-operations.patch b/mirror-Wait-only-for-in-flight-operations.patch new file mode 100644 index 0000000000000000000000000000000000000000..d1b00c059a7afc7d25a7ec53f18cae8dbcdad85f --- /dev/null +++ b/mirror-Wait-only-for-in-flight-operations.patch @@ -0,0 +1,83 @@ +From b4e1ea1c59e4dd8cc95b97ccc4eb1d3957fe5489 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 26 Mar 2020 16:36:28 +0100 +Subject: [PATCH] mirror: Wait only for in-flight operations + +mirror_wait_for_free_in_flight_slot() just picks a random operation to +wait for. However, a MirrorOp is already in s->ops_in_flight when +mirror_co_read() waits for free slots, so if not enough slots are +immediately available, an operation can end up waiting for itself, or +two or more operations can wait for each other to complete, which +results in a hang. + +Fix this by adding a flag to MirrorOp that tells us if the request is +already in flight (and therefore occupies slots that it will later +free), and picking only such operations for waiting. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 +Signed-off-by: Kevin Wolf +Message-Id: <20200326153628.4869-3-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +--- + block/mirror.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 8f0d4544d8..abcf60a961 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -100,6 +100,7 @@ struct MirrorOp { + + bool is_pseudo_op; + bool is_active_write; ++ bool is_in_flight; + CoQueue waiting_requests; + + QTAILQ_ENTRY(MirrorOp) next; +@@ -290,7 +291,9 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation + * to wait on. */ +- if (!op->is_pseudo_op && op->is_active_write == active) { ++ if (!op->is_pseudo_op && op->is_in_flight && ++ op->is_active_write == active) ++ { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -364,6 +367,7 @@ static void coroutine_fn mirror_co_read(void *opaque) + /* Copy the dirty cluster. */ + s->in_flight++; + s->bytes_in_flight += op->bytes; ++ op->is_in_flight = true; + trace_mirror_one_iteration(s, op->offset, op->bytes); + + ret = bdrv_co_preadv(s->mirror_top_bs->backing, op->offset, op->bytes, +@@ -379,6 +383,7 @@ static void coroutine_fn mirror_co_zero(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pwrite_zeroes(op->s->target, op->offset, op->bytes, + op->s->unmap ? BDRV_REQ_MAY_UNMAP : 0); +@@ -393,6 +398,7 @@ static void coroutine_fn mirror_co_discard(void *opaque) + op->s->in_flight++; + op->s->bytes_in_flight += op->bytes; + *op->bytes_handled = op->bytes; ++ op->is_in_flight = true; + + ret = blk_co_pdiscard(op->s->target, op->offset, op->bytes); + mirror_write_complete(op, ret); +@@ -1305,6 +1311,7 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + .offset = offset, + .bytes = bytes, + .is_active_write = true, ++ .is_in_flight = true, + }; + qemu_co_queue_init(&op->waiting_requests); + QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); +-- +2.27.0 + diff --git a/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch b/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch index 51f4113b4a67e8c9dfd67d5f21011ee998e04d13..791449b59540fd0d66cac0c367af8436abb55741 100644 --- a/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch +++ b/monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch @@ -1,6 +1,6 @@ -From 6f7a7f18f4460b0891eabbe1ca69e599216427b7 Mon Sep 17 00:00:00 2001 +From 117082ef493e62e6e2cd972b309e0cd72682ab02 Mon Sep 17 00:00:00 2001 From: Chen Qun -Date: Mon, 16 Mar 2020 14:26:06 +0800 +Date: Tue, 14 Apr 2020 19:50:59 +0800 Subject: [PATCH] moniter: fix memleak in monitor_fdset_dup_fd_find_remove When remove dup_fd in monitor_fdset_dup_fd_find_remove function, @@ -25,14 +25,14 @@ Reported-by: Euler Robot Signed-off-by: Chen Qun (cherry picked from commit a661614de18c89f58cad3fc1bb8aab44e820183a) --- - monitor.c | 1 + + monitor/misc.c | 1 + 1 file changed, 1 insertion(+) -diff --git a/monitor.c b/monitor.c -index 4807bbe..b5b15b5 100644 ---- a/monitor.c -+++ b/monitor.c -@@ -2596,6 +2596,7 @@ static int monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) +diff --git a/monitor/misc.c b/monitor/misc.c +index 00338c00..0d6369ba 100644 +--- a/monitor/misc.c ++++ b/monitor/misc.c +@@ -1746,6 +1746,7 @@ static int64_t monitor_fdset_dup_fd_find_remove(int dup_fd, bool remove) if (mon_fdset_fd_dup->fd == dup_fd) { if (remove) { QLIST_REMOVE(mon_fdset_fd_dup, next); @@ -40,6 +40,5 @@ index 4807bbe..b5b15b5 100644 if (QLIST_EMPTY(&mon_fdset->dup_fds)) { monitor_fdset_cleanup(mon_fdset); } --- -1.8.3.1 - +-- +2.23.0 diff --git a/msix-add-valid.accepts-methods-to-check-address.patch b/msix-add-valid.accepts-methods-to-check-address.patch new file mode 100644 index 0000000000000000000000000000000000000000..67397549683ec1e44d29c1f858f4154bc3b6a024 --- /dev/null +++ b/msix-add-valid.accepts-methods-to-check-address.patch @@ -0,0 +1,78 @@ +From e9cc24b1737f745b23c408b183dd34fda5abc30c Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Fri, 19 Feb 2021 16:28:00 +0800 +Subject: [PATCH] msix: add valid.accepts methods to check address + +Fix CVE-2020-13754 + +While doing msi-x mmio operations, a guest may send an address +that leads to an OOB access issue. Add valid.accepts methods to +ensure that ensuing mmio r/w operation don't go beyond regions. + +Reported-by: Ren Ding +Reported-by: Hanqing Zhao +Reported-by: Anatoly Trosinenko +Reported-by: Alexander Bulekov +Signed-off-by: Prasad J Pandit + +patch link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00004.html +Signed-off-by: Jiajie Li +--- + hw/pci/msix.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/hw/pci/msix.c b/hw/pci/msix.c +index d39dcf32e8..ec43f16875 100644 +--- a/hw/pci/msix.c ++++ b/hw/pci/msix.c +@@ -192,6 +192,15 @@ static void msix_table_mmio_write(void *opaque, hwaddr addr, + msix_handle_mask_update(dev, vector, was_masked); + } + ++static bool msix_table_accepts(void *opaque, hwaddr addr, unsigned size, ++ bool is_write, MemTxAttrs attrs) ++{ ++ PCIDevice *dev = opaque; ++ uint16_t tbl_size = dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE; ++ ++ return dev->msix_table + addr + 4 <= dev->msix_table + tbl_size; ++} ++ + static const MemoryRegionOps msix_table_mmio_ops = { + .read = msix_table_mmio_read, + .write = msix_table_mmio_write, +@@ -199,6 +208,7 @@ static const MemoryRegionOps msix_table_mmio_ops = { + .valid = { + .min_access_size = 4, + .max_access_size = 4, ++ .accepts = msix_table_accepts + }, + }; + +@@ -220,6 +230,15 @@ static void msix_pba_mmio_write(void *opaque, hwaddr addr, + { + } + ++static bool msix_pba_accepts(void *opaque, hwaddr addr, unsigned size, ++ bool is_write, MemTxAttrs attrs) ++{ ++ PCIDevice *dev = opaque; ++ uint16_t pba_size = QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8; ++ ++ return dev->msix_pba + addr + 4 <= dev->msix_pba + pba_size; ++} ++ + static const MemoryRegionOps msix_pba_mmio_ops = { + .read = msix_pba_mmio_read, + .write = msix_pba_mmio_write, +@@ -227,6 +246,7 @@ static const MemoryRegionOps msix_pba_mmio_ops = { + .valid = { + .min_access_size = 4, + .max_access_size = 4, ++ .accepts = msix_pba_accepts + }, + }; + +-- +2.27.0 + diff --git a/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch b/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch new file mode 100644 index 0000000000000000000000000000000000000000..ef380e66655d0faf290b1d61072165a64bc0861e --- /dev/null +++ b/multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch @@ -0,0 +1,62 @@ +From 3db288bbddb730960430fb4907e100f19001ca0a Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:31:07 +0800 +Subject: [PATCH] multifd: Make sure that we don't do any IO after an error + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +--- + migration/ram.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3ded38c0be..b74929542d 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -3617,7 +3617,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + { + RAMState **temp = opaque; + RAMState *rs = *temp; +- int ret; ++ int ret = 0; + int i; + int64_t t0; + int done = 0; +@@ -3686,12 +3686,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_ROUND); + + out: +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); +- ram_counters.transferred += 8; ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ram_counters.transferred += 8; + +- ret = qemu_file_get_error(f); ++ ret = qemu_file_get_error(f); ++ } + if (ret < 0) { + return ret; + } +@@ -3745,9 +3747,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + + rcu_read_unlock(); + +- multifd_send_sync_main(rs); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret >= 0) { ++ multifd_send_sync_main(rs); ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ } + + return ret; + } +-- +2.27.0 + diff --git a/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch b/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2209ef5776728ced1205b9b1ce3e50daeaa84d3 --- /dev/null +++ b/multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch @@ -0,0 +1,37 @@ +From a4288f41b3af9f4f73f162b89007c6928509a43c Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:51:51 +0800 +Subject: [PATCH] multifd/tls: fix memoryleak of the QIOChannelSocket object + when cancelling migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When creating new tls client, the tioc->master will be referenced which results in socket +leaking after multifd_save_cleanup if we cancel migration. +Fix it by do object_unref() after tls client creation. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Chuan Zheng +Message-Id: <1605104763-118687-1-git-send-email-zhengchuan@huawei.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +--- + migration/ram.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/ram.c b/migration/ram.c +index a37dbfc049..92ce1a53e7 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1246,6 +1246,7 @@ static void multifd_tls_channel_connect(MultiFDSendParams *p, + return; + } + ++ object_unref(OBJECT(ioc)); + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + p->c = QIO_CHANNEL(tioc); +-- +2.27.0 + diff --git a/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch b/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch new file mode 100644 index 0000000000000000000000000000000000000000..71ce6cabd3a38d5af0701f579babe793b1ea7d07 --- /dev/null +++ b/nbd-server-Avoid-long-error-message-assertions-CVE-2.patch @@ -0,0 +1,152 @@ +From 719292175d391e77487f3c55f5f97a065e44d9f8 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 10 Jun 2020 18:32:01 -0400 +Subject: [PATCH] nbd/server: Avoid long error message assertions + CVE-2020-10761 + +RH-Author: Eric Blake +Message-id: <20200610183202.3780750-2-eblake@redhat.com> +Patchwork-id: 97494 +O-Subject: [RHEL-AV-8.2.1 qemu-kvm PATCH 1/2] nbd/server: Avoid long error message assertions CVE-2020-10761 +Bugzilla: 1845384 +RH-Acked-by: Sergio Lopez Pascual +RH-Acked-by: Max Reitz +RH-Acked-by: Stefan Hajnoczi + +Ever since commit 36683283 (v2.8), the server code asserts that error +strings sent to the client are well-formed per the protocol by not +exceeding the maximum string length of 4096. At the time the server +first started sending error messages, the assertion could not be +triggered, because messages were completely under our control. +However, over the years, we have added latent scenarios where a client +could trigger the server to attempt an error message that would +include the client's information if it passed other checks first: + +- requesting NBD_OPT_INFO/GO on an export name that is not present + (commit 0cfae925 in v2.12 echoes the name) + +- requesting NBD_OPT_LIST/SET_META_CONTEXT on an export name that is + not present (commit e7b1948d in v2.12 echoes the name) + +At the time, those were still safe because we flagged names larger +than 256 bytes with a different message; but that changed in commit +93676c88 (v4.2) when we raised the name limit to 4096 to match the NBD +string limit. (That commit also failed to change the magic number +4096 in nbd_negotiate_send_rep_err to the just-introduced named +constant.) So with that commit, long client names appended to server +text can now trigger the assertion, and thus be used as a denial of +service attack against a server. As a mitigating factor, if the +server requires TLS, the client cannot trigger the problematic paths +unless it first supplies TLS credentials, and such trusted clients are +less likely to try to intentionally crash the server. + +We may later want to further sanitize the user-supplied strings we +place into our error messages, such as scrubbing out control +characters, but that is less important to the CVE fix, so it can be a +later patch to the new nbd_sanitize_name. + +Consideration was given to changing the assertion in +nbd_negotiate_send_rep_verr to instead merely log a server error and +truncate the message, to avoid leaving a latent path that could +trigger a future CVE DoS on any new error message. However, this +merely complicates the code for something that is already (correctly) +flagging coding errors, and now that we are aware of the long message +pitfall, we are less likely to introduce such errors in the future, +which would make such error handling dead code. + +Reported-by: Xueqiang Wei +CC: qemu-stable@nongnu.org +Fixes: https://bugzilla.redhat.com/1843684 CVE-2020-10761 +Fixes: 93676c88d7 +Signed-off-by: Eric Blake +Message-Id: <20200610163741.3745251-2-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 5c4fe018c025740fef4a0a4421e8162db0c3eefd) +Signed-off-by: Eric Blake +Signed-off-by: Eduardo Lima (Etrunko) +--- + nbd/server.c | 21 +++++++++++++++++++-- + tests/qemu-iotests/143 | 4 ++++ + tests/qemu-iotests/143.out | 2 ++ + 3 files changed, 25 insertions(+), 2 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 2d81248967..115e8f06ed 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -229,6 +229,19 @@ out: + return ret; + } + ++/* ++ * Return a malloc'd copy of @name suitable for use in an error reply. ++ */ ++static char * ++nbd_sanitize_name(const char *name) ++{ ++ if (strnlen(name, 80) < 80) { ++ return g_strdup(name); ++ } ++ /* XXX Should we also try to sanitize any control characters? */ ++ return g_strdup_printf("%.80s...", name); ++} ++ + /* Send an error reply. + * Return -errno on error, 0 on success. */ + static int GCC_FMT_ATTR(4, 5) +@@ -584,9 +597,11 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, + + exp = nbd_export_find(name); + if (!exp) { ++ g_autofree char *sane_name = nbd_sanitize_name(name); ++ + return nbd_negotiate_send_rep_err(client, NBD_REP_ERR_UNKNOWN, + errp, "export '%s' not present", +- name); ++ sane_name); + } + + /* Don't bother sending NBD_INFO_NAME unless client requested it */ +@@ -975,8 +990,10 @@ static int nbd_negotiate_meta_queries(NBDClient *client, + + meta->exp = nbd_export_find(export_name); + if (meta->exp == NULL) { ++ g_autofree char *sane_name = nbd_sanitize_name(export_name); ++ + return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, +- "export '%s' not present", export_name); ++ "export '%s' not present", sane_name); + } + + ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); +diff --git a/tests/qemu-iotests/143 b/tests/qemu-iotests/143 +index f649b36195..d2349903b1 100755 +--- a/tests/qemu-iotests/143 ++++ b/tests/qemu-iotests/143 +@@ -58,6 +58,10 @@ _send_qemu_cmd $QEMU_HANDLE \ + $QEMU_IO_PROG -f raw -c quit \ + "nbd+unix:///no_such_export?socket=$SOCK_DIR/nbd" 2>&1 \ + | _filter_qemu_io | _filter_nbd ++# Likewise, with longest possible name permitted in NBD protocol ++$QEMU_IO_PROG -f raw -c quit \ ++ "nbd+unix:///$(printf %4096d 1 | tr ' ' a)?socket=$SOCK_DIR/nbd" 2>&1 \ ++ | _filter_qemu_io | _filter_nbd | sed 's/aaaa*aa/aa--aa/' + + _send_qemu_cmd $QEMU_HANDLE \ + "{ 'execute': 'quit' }" \ +diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out +index 037d34a409..fc7bab3129 100644 +--- a/tests/qemu-iotests/143.out ++++ b/tests/qemu-iotests/143.out +@@ -3,6 +3,8 @@ QA output created by 143 + {"return": {}} + qemu-io: can't open device nbd+unix:///no_such_export?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'no_such_export' not present ++qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available ++server reported: export 'aa--aa...' not present + {"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + *** done +-- +2.27.0 + diff --git a/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch b/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch new file mode 100644 index 0000000000000000000000000000000000000000..a96c178294ea7c2afe5a860f4021efe361a6a18f --- /dev/null +++ b/net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch @@ -0,0 +1,46 @@ +From cb6048ace290e770b0ec1a6011209192541d3e8a Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Fri, 30 Oct 2020 10:46:55 +0800 +Subject: [PATCH] net/l2tpv3: Remove redundant check in net_init_l2tpv3() + +The result has been checked to be NULL before, it cannot be NULL here, +so the check is redundant. Remove it. + +Reported-by: Euler Robot +Signed-off-by: AlexChen +Signed-off-by: Jason Wang +(cherry-picked from commit d949fe64b0) +--- + net/l2tpv3.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/net/l2tpv3.c b/net/l2tpv3.c +index 55fea17c0f..e4d4218db6 100644 +--- a/net/l2tpv3.c ++++ b/net/l2tpv3.c +@@ -655,9 +655,8 @@ int net_init_l2tpv3(const Netdev *netdev, + error_setg(errp, "could not bind socket err=%i", errno); + goto outerr; + } +- if (result) { +- freeaddrinfo(result); +- } ++ ++ freeaddrinfo(result); + + memset(&hints, 0, sizeof(hints)); + +@@ -686,9 +685,7 @@ int net_init_l2tpv3(const Netdev *netdev, + memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); + s->dst_size = result->ai_addrlen; + +- if (result) { +- freeaddrinfo(result); +- } ++ freeaddrinfo(result); + + if (l2tpv3->has_counter && l2tpv3->counter) { + s->has_counter = true; +-- +2.27.0 + diff --git a/net-remove-an-assert-call-in-eth_get_gso_type.patch b/net-remove-an-assert-call-in-eth_get_gso_type.patch new file mode 100644 index 0000000000000000000000000000000000000000..79d740922a2c2fd766b19c89d68df49a6d0a96f1 --- /dev/null +++ b/net-remove-an-assert-call-in-eth_get_gso_type.patch @@ -0,0 +1,49 @@ +From 9b210ed120ac82e647ed99be3679bab2bc55932b Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Wed, 21 Oct 2020 11:35:50 +0530 +Subject: [PATCH] net: remove an assert call in eth_get_gso_type + +fix CVE-2020-27617 + +eth_get_gso_type() routine returns segmentation offload type based on +L3 protocol type. It calls g_assert_not_reached if L3 protocol is +unknown, making the following return statement unreachable. Remove the +g_assert call, it maybe triggered by a guest user. + +Reported-by: Gaoning Pan +Signed-off-by: Prasad J Pandit +Signed-off-by: Jason Wang + +cherry-pick from commit 7564bf7701f00214cdc8a678a9f7df765244def1 +Signed-off-by: Jiajie Li +--- + net/eth.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/eth.c b/net/eth.c +index 0c1d413ee2..1e0821c5f8 100644 +--- a/net/eth.c ++++ b/net/eth.c +@@ -16,6 +16,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "net/eth.h" + #include "net/checksum.h" + #include "net/tap.h" +@@ -71,9 +72,8 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) + return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; + } + } +- +- /* Unsupported offload */ +- g_assert_not_reached(); ++ qemu_log_mask(LOG_UNIMP, "%s: probably not GSO frame, " ++ "unknown L3 protocol: 0x%04"PRIx16"\n", __func__, l3_proto); + + return VIRTIO_NET_HDR_GSO_NONE | ecn_state; + } +-- +2.27.0 + diff --git a/net-vmxnet3-validate-configuration-values-during-act.patch b/net-vmxnet3-validate-configuration-values-during-act.patch new file mode 100644 index 0000000000000000000000000000000000000000..a4ed4bccc5534b5b6f04d61ea7c4bccfc72e3fa8 --- /dev/null +++ b/net-vmxnet3-validate-configuration-values-during-act.patch @@ -0,0 +1,79 @@ +From 18d22b1f2b2f89bbdd77bd4d62e0fe42f19b3962 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Tue, 9 Mar 2021 17:37:20 +0800 +Subject: [PATCH] net: vmxnet3: validate configuration values during activate + (CVE-2021-20203) + +fix CVE-2021-20203 #I3A34O + +While activating device in vmxnet3_acticate_device(), it does not +validate guest supplied configuration values against predefined +minimum - maximum limits. This may lead to integer overflow or +OOB access issues. Add checks to avoid it. + +Fixes: CVE-2021-20203 +Buglink: https://bugs.launchpad.net/qemu/+bug/1913873 +Reported-by: Gaoning Pan +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/net/vmxnet3.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c +index 10d01d0058..ecc4f5bcf0 100644 +--- a/hw/net/vmxnet3.c ++++ b/hw/net/vmxnet3.c +@@ -1418,6 +1418,7 @@ static void vmxnet3_activate_device(VMXNET3State *s) + vmxnet3_setup_rx_filtering(s); + /* Cache fields from shared memory */ + s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); ++ assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); + VMW_CFPRN("MTU is %u", s->mtu); + + s->max_rx_frags = +@@ -1471,7 +1472,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) + /* Read rings memory locations for TX queues */ + pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.txRingBasePA); + size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.txRingSize); +- ++ if (size > VMXNET3_TX_RING_MAX_SIZE) { ++ size = VMXNET3_TX_RING_MAX_SIZE; ++ } + vmxnet3_ring_init(d, &s->txq_descr[i].tx_ring, pa, size, + sizeof(struct Vmxnet3_TxDesc), false); + VMXNET3_RING_DUMP(VMW_CFPRN, "TX", i, &s->txq_descr[i].tx_ring); +@@ -1481,6 +1484,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) + /* TXC ring */ + pa = VMXNET3_READ_TX_QUEUE_DESCR64(d, qdescr_pa, conf.compRingBasePA); + size = VMXNET3_READ_TX_QUEUE_DESCR32(d, qdescr_pa, conf.compRingSize); ++ if (size > VMXNET3_TC_RING_MAX_SIZE) { ++ size = VMXNET3_TC_RING_MAX_SIZE; ++ } + vmxnet3_ring_init(d, &s->txq_descr[i].comp_ring, pa, size, + sizeof(struct Vmxnet3_TxCompDesc), true); + VMXNET3_RING_DUMP(VMW_CFPRN, "TXC", i, &s->txq_descr[i].comp_ring); +@@ -1522,6 +1528,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) + /* RX rings */ + pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.rxRingBasePA[j]); + size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.rxRingSize[j]); ++ if (size > VMXNET3_RX_RING_MAX_SIZE) { ++ size = VMXNET3_RX_RING_MAX_SIZE; ++ } + vmxnet3_ring_init(d, &s->rxq_descr[i].rx_ring[j], pa, size, + sizeof(struct Vmxnet3_RxDesc), false); + VMW_CFPRN("RX queue %d:%d: Base: %" PRIx64 ", Size: %d", +@@ -1531,6 +1540,9 @@ static void vmxnet3_activate_device(VMXNET3State *s) + /* RXC ring */ + pa = VMXNET3_READ_RX_QUEUE_DESCR64(d, qd_pa, conf.compRingBasePA); + size = VMXNET3_READ_RX_QUEUE_DESCR32(d, qd_pa, conf.compRingSize); ++ if (size > VMXNET3_RC_RING_MAX_SIZE) { ++ size = VMXNET3_RC_RING_MAX_SIZE; ++ } + vmxnet3_ring_init(d, &s->rxq_descr[i].comp_ring, pa, size, + sizeof(struct Vmxnet3_RxCompDesc), true); + VMW_CFPRN("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size); +-- +2.27.0 + diff --git a/nvram-add-nrf51_soc-flash-read-method.patch b/nvram-add-nrf51_soc-flash-read-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..915bbf8a1e6eecaa0974f14db93847cedded7d85 --- /dev/null +++ b/nvram-add-nrf51_soc-flash-read-method.patch @@ -0,0 +1,44 @@ +From 6f88633406e546eb6a01786b910a2ab12373abf8 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:19:15 +0800 +Subject: [PATCH] nvram: add nrf51_soc flash read method + +fix CVE-2020-15469 + +Add nrf51_soc mmio read method to avoid NULL pointer dereference +issue. + +Reported-by: Lei Sun +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/nvram/nrf51_nvm.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/nvram/nrf51_nvm.c b/hw/nvram/nrf51_nvm.c +index eca0cb35b5..7b2b1351f4 100644 +--- a/hw/nvram/nrf51_nvm.c ++++ b/hw/nvram/nrf51_nvm.c +@@ -271,6 +271,10 @@ static const MemoryRegionOps io_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static uint64_t flash_read(void *opaque, hwaddr offset, unsigned size) ++{ ++ g_assert_not_reached(); ++} + + static void flash_write(void *opaque, hwaddr offset, uint64_t value, + unsigned int size) +@@ -298,6 +302,7 @@ static void flash_write(void *opaque, hwaddr offset, uint64_t value, + + + static const MemoryRegionOps flash_ops = { ++ .read = flash_read, + .write = flash_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, +-- +2.27.0 + diff --git a/object-return-self-in-object_ref.patch b/object-return-self-in-object_ref.patch new file mode 100644 index 0000000000000000000000000000000000000000..e851fb30d20a4c56b65bb7ce1c6ddb9109c643aa --- /dev/null +++ b/object-return-self-in-object_ref.patch @@ -0,0 +1,58 @@ +From b77ade9bb37b2e9813a42008cb21d0c743aa50a1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 10 Jan 2020 19:30:31 +0400 +Subject: [PATCH] object: return self in object_ref() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This allow for simpler assignment with ref: foo = object_ref(bar) + +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20200110153039.1379601-19-marcandre.lureau@redhat.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Zhenyu Ye +--- + include/qom/object.h | 3 ++- + qom/object.c | 5 +++-- + 2 files changed, 5 insertions(+), 3 deletions(-) + +diff --git a/include/qom/object.h b/include/qom/object.h +index 5e2f60d4b0..18660fde1c 100644 +--- a/include/qom/object.h ++++ b/include/qom/object.h +@@ -1005,8 +1005,9 @@ GSList *object_class_get_list_sorted(const char *implements_type, + * + * Increase the reference count of a object. A object cannot be freed as long + * as its reference count is greater than zero. ++ * Returns: @obj + */ +-void object_ref(Object *obj); ++Object *object_ref(Object *obj); + + /** + * object_unref: +diff --git a/qom/object.c b/qom/object.c +index 66c4a5f1cb..555c8b9d07 100644 +--- a/qom/object.c ++++ b/qom/object.c +@@ -1107,12 +1107,13 @@ GSList *object_class_get_list_sorted(const char *implements_type, + object_class_cmp); + } + +-void object_ref(Object *obj) ++Object *object_ref(Object *obj) + { + if (!obj) { +- return; ++ return NULL; + } + atomic_inc(&obj->ref); ++ return obj; + } + + void object_unref(Object *obj) +-- +2.22.0.windows.1 + diff --git a/pc-Don-t-make-die-id-mandatory-unless-necessary.patch b/pc-Don-t-make-die-id-mandatory-unless-necessary.patch new file mode 100644 index 0000000000000000000000000000000000000000..c51b40f33020e36547f44b895b040acc07bf741c --- /dev/null +++ b/pc-Don-t-make-die-id-mandatory-unless-necessary.patch @@ -0,0 +1,102 @@ +From 7ebcd375ade505358c1c45542de22f188c599bdd Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Fri, 16 Aug 2019 14:07:50 -0300 +Subject: [PATCH] pc: Don't make die-id mandatory unless necessary + +We have this issue reported when using libvirt to hotplug CPUs: +https://bugzilla.redhat.com/show_bug.cgi?id=1741451 + +Basically, libvirt is not copying die-id from +query-hotpluggable-cpus, but die-id is now mandatory. + +We could blame libvirt and say it is not following the documented +interface, because we have this buried in the QAPI schema +documentation: + +> Note: currently there are 5 properties that could be present +> but management should be prepared to pass through other +> properties with device_add command to allow for future +> interface extension. This also requires the filed names to be kept in +> sync with the properties passed to -device/device_add. + +But I don't think this would be reasonable from us. We can just +make QEMU more flexible and let die-id to be omitted when there's +no ambiguity. This will allow us to keep compatibility with +existing libvirt versions. + +Test case included to ensure we don't break this again. + +Fixes: commit 176d2cda0dee ("i386/cpu: Consolidate die-id validity in smp context") +Signed-off-by: Eduardo Habkost +Message-Id: <20190816170750.23910-1-ehabkost@redhat.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit fea374e7c8079563bca7c8fac895c6a880f76adc) +Signed-off-by: Michael Roth +--- + hw/i386/pc.c | 8 ++++++ + tests/acceptance/pc_cpu_hotplug_props.py | 35 ++++++++++++++++++++++++ + 2 files changed, 43 insertions(+) + create mode 100644 tests/acceptance/pc_cpu_hotplug_props.py + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 549c437050..947f81070f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2403,6 +2403,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / pcms->smp_dies; + ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && pcms->smp_dies == 1) { ++ cpu->die_id = 0; ++ } ++ + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; +diff --git a/tests/acceptance/pc_cpu_hotplug_props.py b/tests/acceptance/pc_cpu_hotplug_props.py +new file mode 100644 +index 0000000000..08b7e632c6 +--- /dev/null ++++ b/tests/acceptance/pc_cpu_hotplug_props.py +@@ -0,0 +1,35 @@ ++# ++# Ensure CPU die-id can be omitted on -device ++# ++# Copyright (c) 2019 Red Hat Inc ++# ++# Author: ++# Eduardo Habkost ++# ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2 of the License, or (at your option) any later version. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, see . ++# ++ ++from avocado_qemu import Test ++ ++class OmittedCPUProps(Test): ++ """ ++ :avocado: tags=arch:x86_64 ++ """ ++ def test_no_die_id(self): ++ self.vm.add_args('-nodefaults', '-S') ++ self.vm.add_args('-smp', '1,sockets=2,cores=2,threads=2,maxcpus=8') ++ self.vm.add_args('-cpu', 'qemu64') ++ self.vm.add_args('-device', 'qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id=0') ++ self.vm.launch() ++ self.assertEquals(len(self.vm.command('query-cpus')), 2) +-- +2.23.0 diff --git a/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch b/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e29f08d69c56a2907892008c9c4ce177778c097 --- /dev/null +++ b/pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch @@ -0,0 +1,34 @@ +From d2bb5b4c4ed3b1dbc0096deb195b6df33f813f23 Mon Sep 17 00:00:00 2001 +From: Yifan Luo +Date: Wed, 14 Aug 2019 14:14:26 +0800 +Subject: [PATCH 5/5] pc-bios/s390-ccw/net: fix a possible memory leak in + get_uuid() + +There is a possible memory leak in get_uuid(). Should free allocated mem +before +return NULL. + +Signed-off-by: Yifan Luo +Message-Id: <02cf01d55267$86cf2850$946d78f0$@cmss.chinamobile.com> +Reviewed-by: Thomas Huth +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/netmain.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/pc-bios/s390-ccw/netmain.c b/pc-bios/s390-ccw/netmain.c +index f3542cb2..f2dcc01e 100644 +--- a/pc-bios/s390-ccw/netmain.c ++++ b/pc-bios/s390-ccw/netmain.c +@@ -269,6 +269,7 @@ static const char *get_uuid(void) + : "d" (r0), "d" (r1), [addr] "a" (buf) + : "cc", "memory"); + if (cc) { ++ free(mem); + return NULL; + } + +-- +2.23.0 + diff --git a/pci-Add-return_page_response-pci-ops.patch b/pci-Add-return_page_response-pci-ops.patch new file mode 100644 index 0000000000000000000000000000000000000000..133762085ba7a4a01fd5e909a3c17cd45a975978 --- /dev/null +++ b/pci-Add-return_page_response-pci-ops.patch @@ -0,0 +1,86 @@ +From e3b498a1afec138693251bf1bd1fa9b322a880fb Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 Nov 2020 14:34:35 +0100 +Subject: [PATCH] pci: Add return_page_response pci ops + +Add a new PCI operation that allows to return page responses +to registered VFIO devices + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/pci/pci.c | 16 ++++++++++++++++ + include/hw/iommu/iommu.h | 8 ++++++++ + include/hw/pci/pci.h | 4 ++++ + 3 files changed, 28 insertions(+) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index f11ca7964e..a8b3d1c071 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2660,6 +2660,22 @@ int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, + return -ENOENT; + } + ++int pci_device_return_page_response(PCIBus *bus, int32_t devfn, ++ IOMMUPageResponse *resp) ++{ ++ PCIDevice *dev; ++ ++ if (!bus) { ++ return -EINVAL; ++ } ++ ++ dev = bus->devices[devfn]; ++ if (dev && dev->pasid_ops && dev->pasid_ops->return_page_response) { ++ return dev->pasid_ops->return_page_response(bus, devfn, resp); ++ } ++ return -ENOENT; ++} ++ + static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) + { + Range *range = opaque; +diff --git a/include/hw/iommu/iommu.h b/include/hw/iommu/iommu.h +index 12092bda7b..5890f095b1 100644 +--- a/include/hw/iommu/iommu.h ++++ b/include/hw/iommu/iommu.h +@@ -24,5 +24,13 @@ typedef struct IOMMUConfig { + }; + } IOMMUConfig; + ++typedef struct IOMMUPageResponse { ++ union { ++#ifdef __linux__ ++ struct iommu_page_response resp; ++#endif ++ }; ++} IOMMUPageResponse; ++ + + #endif /* QEMU_HW_IOMMU_IOMMU_H */ +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index bb14ed61b0..5e7e0e4e6f 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -266,6 +266,8 @@ typedef struct PCIReqIDCache PCIReqIDCache; + + struct PCIPASIDOps { + int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config); ++ int (*return_page_response)(PCIBus *bus, int32_t devfn, ++ IOMMUPageResponse *resp); + }; + typedef struct PCIPASIDOps PCIPASIDOps; + +@@ -495,6 +497,8 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); + void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops); + bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn); + int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config); ++int pci_device_return_page_response(PCIBus *bus, int32_t devfn, ++ IOMMUPageResponse *resp); + + static inline void + pci_set_byte(uint8_t *config, uint8_t val) +-- +2.27.0 + diff --git a/pci-check-bus-pointer-before-dereference.patch b/pci-check-bus-pointer-before-dereference.patch new file mode 100644 index 0000000000000000000000000000000000000000..540caaf945f28b4e13e53a299495528cb0703b5d --- /dev/null +++ b/pci-check-bus-pointer-before-dereference.patch @@ -0,0 +1,50 @@ +From e393095e6d1456e2fb22f3cde3a9f0a307152562 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Wed, 14 Oct 2020 15:00:20 +0800 +Subject: [PATCH] pci: check bus pointer before dereference + +fix CVE-2020-25742 + +patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05294.html + +While mapping IRQ level in pci_change_irq_level() routine, +it does not check if pci_get_bus() returned a valid pointer. +It may lead to a NULL pointer dereference issue. Add check to +avoid it. + + -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Flsi_nullptr1 + ==1183858==Hint: address points to the zero page. + #0 pci_change_irq_level hw/pci/pci.c:259 + #1 pci_irq_handler hw/pci/pci.c:1445 + #2 pci_set_irq hw/pci/pci.c:1463 + #3 lsi_set_irq hw/scsi/lsi53c895a.c:488 + #4 lsi_update_irq hw/scsi/lsi53c895a.c:523 + #5 lsi_script_scsi_interrupt hw/scsi/lsi53c895a.c:554 + #6 lsi_execute_script hw/scsi/lsi53c895a.c:1149 + #7 lsi_reg_writeb hw/scsi/lsi53c895a.c:1984 + #8 lsi_io_write hw/scsi/lsi53c895a.c:2146 + ... + +Reported-by: Ruhr-University +Signed-off-by: Prasad J Pandit +--- + hw/pci/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index de0fae10ab..df5a2c3294 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -253,6 +253,9 @@ static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) + PCIBus *bus; + for (;;) { + bus = pci_get_bus(pci_dev); ++ if (!bus) { ++ return; ++ } + irq_num = bus->map_irq(pci_dev, irq_num); + if (bus->set_irq) + break; +-- +2.23.0 + diff --git a/pci-host-add-pcie-msi-read-method.patch b/pci-host-add-pcie-msi-read-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..7433fb0c3ce55f68593f3d612663c4db3a9960cd --- /dev/null +++ b/pci-host-add-pcie-msi-read-method.patch @@ -0,0 +1,56 @@ +From dd86dc83fcccc0d1773bd93c509e3a03e7ef9b38 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:08:24 +0800 +Subject: [PATCH] pci-host: add pcie-msi read method + +fix CVE-2020-15469 + +Add pcie-msi mmio read method to avoid NULL pointer dereference +issue. + +Reported-by: Lei Sun +Reviewed-by: Li Qiang +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/pci-host/designware.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c +index 9ae8c0deb7..23e3de3cad 100644 +--- a/hw/pci-host/designware.c ++++ b/hw/pci-host/designware.c +@@ -21,6 +21,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "qemu/log.h" + #include "hw/pci/msi.h" + #include "hw/pci/pci_bridge.h" + #include "hw/pci/pci_host.h" +@@ -60,6 +61,13 @@ designware_pcie_root_to_host(DesignwarePCIERoot *root) + return DESIGNWARE_PCIE_HOST(bus->parent); + } + ++static uint64_t designware_pcie_root_msi_read(void *opaque, hwaddr addr, ++ unsigned size) ++{ ++ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); ++ return 0; ++} ++ + static void designware_pcie_root_msi_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) + { +@@ -74,6 +82,7 @@ static void designware_pcie_root_msi_write(void *opaque, hwaddr addr, + } + + static const MemoryRegionOps designware_pci_host_msi_ops = { ++ .read = designware_pcie_root_msi_read, + .write = designware_pcie_root_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { +-- +2.27.0 + diff --git a/pci-introduce-PCIPASIDOps-to-PCIDevice.patch b/pci-introduce-PCIPASIDOps-to-PCIDevice.patch new file mode 100644 index 0000000000000000000000000000000000000000..e89cdc8df7130e348c916988d8b85f39ec4b6d19 --- /dev/null +++ b/pci-introduce-PCIPASIDOps-to-PCIDevice.patch @@ -0,0 +1,127 @@ +From 26adddfe4645b69c16ed8d6601f373d40bddd0e3 Mon Sep 17 00:00:00 2001 +From: Liu Yi L +Date: Fri, 5 Jul 2019 19:01:36 +0800 +Subject: [PATCH] pci: introduce PCIPASIDOps to PCIDevice + +This patch introduces PCIPASIDOps for IOMMU related operations. + +https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00078.html +https://lists.gnu.org/archive/html/qemu-devel/2018-03/msg00940.html + +So far, to setup virt-SVA for assigned SVA capable device, needs to +configure host translation structures for specific pasid. (e.g. bind +guest page table to host and enable nested translation in host). +Besides, vIOMMU emulator needs to forward guest's cache invalidation +to host since host nested translation is enabled. e.g. on VT-d, guest +owns 1st level translation table, thus cache invalidation for 1st +level should be propagated to host. + +This patch adds two functions: alloc_pasid and free_pasid to support +guest pasid allocation and free. The implementations of the callbacks +would be device passthru modules. Like vfio. + +Cc: Kevin Tian +Cc: Jacob Pan +Cc: Peter Xu +Cc: Eric Auger +Cc: Yi Sun +Cc: David Gibson +Signed-off-by: Liu Yi L +Signed-off-by: Yi Sun +Signed-off-by: Kunkun Jiang +--- + hw/pci/pci.c | 34 ++++++++++++++++++++++++++++++++++ + include/hw/pci/pci.h | 11 +++++++++++ + 2 files changed, 45 insertions(+) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index e74143ccc3..f11ca7964e 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2626,6 +2626,40 @@ void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque) + bus->iommu_opaque = opaque; + } + ++void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops) ++{ ++ assert(ops && !dev->pasid_ops); ++ dev->pasid_ops = ops; ++} ++ ++bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn) ++{ ++ PCIDevice *dev; ++ ++ if (!bus) { ++ return false; ++ } ++ ++ dev = bus->devices[devfn]; ++ return !!(dev && dev->pasid_ops); ++} ++ ++int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, ++ IOMMUConfig *config) ++{ ++ PCIDevice *dev; ++ ++ if (!bus) { ++ return -EINVAL; ++ } ++ ++ dev = bus->devices[devfn]; ++ if (dev && dev->pasid_ops && dev->pasid_ops->set_pasid_table) { ++ return dev->pasid_ops->set_pasid_table(bus, devfn, config); ++ } ++ return -ENOENT; ++} ++ + static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque) + { + Range *range = opaque; +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index aaf1b9f70d..bb14ed61b0 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -9,6 +9,7 @@ + #include "hw/isa/isa.h" + + #include "hw/pci/pcie.h" ++#include "hw/iommu/iommu.h" + + extern bool pci_available; + +@@ -263,6 +264,11 @@ struct PCIReqIDCache { + }; + typedef struct PCIReqIDCache PCIReqIDCache; + ++struct PCIPASIDOps { ++ int (*set_pasid_table)(PCIBus *bus, int32_t devfn, IOMMUConfig *config); ++}; ++typedef struct PCIPASIDOps PCIPASIDOps; ++ + struct PCIDevice { + DeviceState qdev; + +@@ -352,6 +358,7 @@ struct PCIDevice { + MSIVectorUseNotifier msix_vector_use_notifier; + MSIVectorReleaseNotifier msix_vector_release_notifier; + MSIVectorPollNotifier msix_vector_poll_notifier; ++ PCIPASIDOps *pasid_ops; + }; + + void pci_register_bar(PCIDevice *pci_dev, int region_num, +@@ -485,6 +492,10 @@ typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int); + AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); + void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque); + ++void pci_setup_pasid_ops(PCIDevice *dev, PCIPASIDOps *ops); ++bool pci_device_is_pasid_ops_set(PCIBus *bus, int32_t devfn); ++int pci_device_set_pasid_table(PCIBus *bus, int32_t devfn, IOMMUConfig *config); ++ + static inline void + pci_set_byte(uint8_t *config, uint8_t val) + { +-- +2.27.0 + diff --git a/pr-manager-Fix-invalid-g_free-crash-bug.patch b/pr-manager-Fix-invalid-g_free-crash-bug.patch new file mode 100644 index 0000000000000000000000000000000000000000..b171cdb5ae34dab7135926c2250541814d543a02 --- /dev/null +++ b/pr-manager-Fix-invalid-g_free-crash-bug.patch @@ -0,0 +1,39 @@ +From 57fdf4a13ff16d9d48a43f02a5e7b42e3d264f83 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Thu, 22 Aug 2019 15:38:46 +0200 +Subject: [PATCH] pr-manager: Fix invalid g_free() crash bug +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +pr_manager_worker() passes its @opaque argument to g_free(). Wrong; +it points to pr_manager_worker()'s automatic @data. Broken when +commit 2f3a7ab39be converted @data from heap- to stack-allocated. Fix +by deleting the g_free(). + +Fixes: 2f3a7ab39bec4ba8022dc4d42ea641165b004e3e +Cc: qemu-stable@nongnu.org +Signed-off-by: Markus Armbruster +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Paolo Bonzini +Signed-off-by: Kevin Wolf +(cherry picked from commit 6b9d62c2a9e83bbad73fb61406f0ff69b46ff6f3) +Signed-off-by: Michael Roth +--- + scsi/pr-manager.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/scsi/pr-manager.c b/scsi/pr-manager.c +index ee43663576..0c866e8698 100644 +--- a/scsi/pr-manager.c ++++ b/scsi/pr-manager.c +@@ -39,7 +39,6 @@ static int pr_manager_worker(void *opaque) + int fd = data->fd; + int r; + +- g_free(data); + trace_pr_manager_run(fd, hdr->cmdp[0], hdr->cmdp[1]); + + /* The reference was taken in pr_manager_execute. */ +-- +2.23.0 diff --git a/prep-add-ppc-parity-write-method.patch b/prep-add-ppc-parity-write-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..fbc3dcc8cf024a5f26ce66f73ff653b5c74837dd --- /dev/null +++ b/prep-add-ppc-parity-write-method.patch @@ -0,0 +1,50 @@ +From f4eed258b1b8b434927fbc9a18bbcb52d3f55ce6 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:16:14 +0800 +Subject: [PATCH] prep: add ppc-parity write method + +fix CVE-2020-15469 + +Add ppc-parity mmio write method to avoid NULL pointer dereference +issue. + +Reported-by: Lei Sun +Acked-by: David Gibson +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/ppc/prep_systemio.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c +index df7603b986..67244ed48c 100644 +--- a/hw/ppc/prep_systemio.c ++++ b/hw/ppc/prep_systemio.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "hw/isa/isa.h" + #include "exec/address-spaces.h" + #include "qemu/error-report.h" /* for error_report() */ +@@ -232,8 +233,15 @@ static uint64_t ppc_parity_error_readl(void *opaque, hwaddr addr, + return val; + } + ++static void ppc_parity_error_writel(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write access\n", __func__); ++} ++ + static const MemoryRegionOps ppc_parity_error_ops = { + .read = ppc_parity_error_readl, ++ .write = ppc_parity_error_writel, + .valid = { + .min_access_size = 4, + .max_access_size = 4, +-- +2.27.0 + diff --git a/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch b/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ee078c19f392be5b53214f0f03dcea3ecc216fa --- /dev/null +++ b/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch @@ -0,0 +1,214 @@ +From f97eaa27e2fb6b985f090af9acaa780bb6a2ee5b Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:27 +0530 +Subject: [PATCH] qapi: Add VFIO devices migration stats in Migration stats + +Added amount of bytes transferred to the VM at destination by all VFIO +devices + +Signed-off-by: Kirti Wankhede +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 19 +++++++++++++++++++ + hw/vfio/migration.c | 9 +++++++++ + include/hw/vfio/vfio-common.h | 3 +++ + migration/migration.c | 17 +++++++++++++++++ + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 17 +++++++++++++++++ + 6 files changed, 71 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4ce1c10734..a86a4c4506 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -291,6 +291,25 @@ const MemoryRegionOps vfio_region_ops = { + * Device state interfaces + */ + ++bool vfio_mig_active(void) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ ++ if (QLIST_EMPTY(&vfio_group_list)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ if (vbasedev->migration_blocker) { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ + static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + { + VFIOGroup *group; +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0bdf6a1820..b77c66557e 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -45,6 +45,8 @@ + #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) + ++static int64_t bytes_transferred; ++ + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, + off_t off, bool iswrite) + { +@@ -255,6 +257,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) + *size = data_size; + } + ++ bytes_transferred += data_size; + return ret; + } + +@@ -785,6 +788,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: ++ bytes_transferred = 0; + ret = vfio_migration_set_state(vbasedev, + ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), + VFIO_DEVICE_STATE_RUNNING); +@@ -866,6 +870,11 @@ err: + + /* ---------------------------------------------------------------------- */ + ++int64_t vfio_mig_bytes_transferred(void) ++{ ++ return bytes_transferred; ++} ++ + int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + { + VFIOContainer *container = vbasedev->group->container; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 8fd0212264..048731e81f 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -203,6 +203,9 @@ extern const MemoryRegionOps vfio_region_ops; + typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + extern VFIOGroupList vfio_group_list; + ++bool vfio_mig_active(void); ++int64_t vfio_mig_bytes_transferred(void); ++ + #ifdef CONFIG_LINUX + int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +diff --git a/migration/migration.c b/migration/migration.c +index b0b9430822..9faf5f63a6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -49,6 +49,10 @@ + #include "monitor/monitor.h" + #include "net/announce.h" + ++#ifdef CONFIG_VFIO ++#include "hw/vfio/vfio-common.h" ++#endif ++ + #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled +@@ -908,6 +912,17 @@ static void populate_disk_info(MigrationInfo *info) + } + } + ++static void populate_vfio_info(MigrationInfo *info) ++{ ++#ifdef CONFIG_VFIO ++ if (vfio_mig_active()) { ++ info->has_vfio = true; ++ info->vfio = g_malloc0(sizeof(*info->vfio)); ++ info->vfio->transferred = vfio_mig_bytes_transferred(); ++ } ++#endif ++} ++ + static void fill_source_migration_info(MigrationInfo *info) + { + MigrationState *s = migrate_get_current(); +@@ -941,6 +956,7 @@ static void fill_source_migration_info(MigrationInfo *info) + + populate_ram_info(info, s); + populate_disk_info(info); ++ populate_vfio_info(info); + break; + case MIGRATION_STATUS_COLO: + info->has_status = true; +@@ -956,6 +972,7 @@ static void fill_source_migration_info(MigrationInfo *info) + info->setup_time = s->setup_time; + + populate_ram_info(info, s); ++ populate_vfio_info(info); + break; + case MIGRATION_STATUS_FAILED: + info->has_status = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index e5a7a88ba2..cecaae0a47 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -370,6 +370,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + } + monitor_printf(mon, "]\n"); + } ++ ++ if (info->has_vfio) { ++ monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n", ++ info->vfio->transferred >> 10); ++ } ++ + qapi_free_MigrationInfo(info); + qapi_free_MigrationCapabilityStatusList(caps); + } +diff --git a/qapi/migration.json b/qapi/migration.json +index 587ef65872..1f0eb19ac6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -141,6 +141,18 @@ + 'active', 'postcopy-active', 'postcopy-paused', + 'postcopy-recover', 'completed', 'failed', 'colo', + 'pre-switchover', 'device' ] } ++## ++# @VfioStats: ++# ++# Detailed VFIO devices migration statistics ++# ++# @transferred: amount of bytes transferred to the target VM by VFIO devices ++# ++# Since: 5.2 ++# ++## ++{ 'struct': 'VfioStats', ++ 'data': {'transferred': 'int' } } + + ## + # @MigrationInfo: +@@ -202,11 +214,16 @@ + # + # @socket-address: Only used for tcp, to know what the real port is (Since 4.0) + # ++# @vfio: @VfioStats containing detailed VFIO devices migration statistics, ++# only returned if VFIO device is present, migration is supported by all ++# VFIO devices and status is 'active' or 'completed' (since 5.2) ++# + # Since: 0.14.0 + ## + { 'struct': 'MigrationInfo', + 'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats', + '*disk': 'MigrationStats', ++ '*vfio': 'VfioStats', + '*xbzrle-cache': 'XBZRLECacheStats', + '*total-time': 'int', + '*expected-downtime': 'int', +-- +2.27.0 + diff --git a/qapi-add-BitmapSyncMode-enum.patch b/qapi-add-BitmapSyncMode-enum.patch new file mode 100644 index 0000000000000000000000000000000000000000..778faeee0dc98c233e2415190a8941ed6bd137db --- /dev/null +++ b/qapi-add-BitmapSyncMode-enum.patch @@ -0,0 +1,54 @@ +From bd1d5d79f4629520d0753676cea8129c60fc6bbc Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:52 -0400 +Subject: [PATCH] qapi: add BitmapSyncMode enum + +Depending on what a user is trying to accomplish, there might be a few +bitmap cleanup actions that occur when an operation is finished that +could be useful. + +I am proposing three: +- NEVER: The bitmap is never synchronized against what was copied. +- ALWAYS: The bitmap is always synchronized, even on failures. +- ON-SUCCESS: The bitmap is synchronized only on success. + +The existing incremental backup modes use 'on-success' semantics, +so add just that one for right now. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +Reviewed-by: Markus Armbruster +Message-id: 20190709232550.10724-5-jsnow@redhat.com +Signed-off-by: John Snow +--- + qapi/block-core.json | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 37aa1b7b9a..b8d12a4951 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1134,6 +1134,20 @@ + { 'enum': 'MirrorSyncMode', + 'data': ['top', 'full', 'none', 'incremental'] } + ++## ++# @BitmapSyncMode: ++# ++# An enumeration of possible behaviors for the synchronization of a bitmap ++# when used for data copy operations. ++# ++# @on-success: The bitmap is only synced when the operation is successful. ++# This is the behavior always used for 'INCREMENTAL' backups. ++# ++# Since: 4.2 ++## ++{ 'enum': 'BitmapSyncMode', ++ 'data': ['on-success'] } ++ + ## + # @MirrorCopyMode: + # +-- +2.27.0 + diff --git a/qapi-block-core-Add-retry-option-for-error-action.patch b/qapi-block-core-Add-retry-option-for-error-action.patch new file mode 100644 index 0000000000000000000000000000000000000000..817ff7051db6ab8b9a36d8d53427495c628a2a89 --- /dev/null +++ b/qapi-block-core-Add-retry-option-for-error-action.patch @@ -0,0 +1,52 @@ +From 9a95d75bdd469c9c7d44c7c72bc16d57ef2f65cc Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:45 +0800 +Subject: [PATCH] qapi/block-core: Add retry option for error action + +Add a new error action 'retry' to support retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + blockdev.c | 2 ++ + qapi/block-core.json | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4d141e9a1f..0f49fd290e 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -319,6 +319,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) + return BLOCKDEV_ON_ERROR_STOP; + } else if (!strcmp(buf, "report")) { + return BLOCKDEV_ON_ERROR_REPORT; ++ } else if (!strcmp(buf, "retry")) { ++ return BLOCKDEV_ON_ERROR_RETRY; + } else { + error_setg(errp, "'%s' invalid %s error action", + buf, is_read ? "read" : "write"); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 0d43d4f37c..db24f0dfe5 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1113,7 +1113,7 @@ + # Since: 1.3 + ## + { 'enum': 'BlockdevOnError', +- 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } ++ 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] } + + ## + # @MirrorSyncMode: +@@ -4894,7 +4894,7 @@ + # Since: 2.1 + ## + { 'enum': 'BlockErrorAction', +- 'data': [ 'ignore', 'report', 'stop' ] } ++ 'data': [ 'ignore', 'report', 'stop', 'retry' ] } + + + ## +-- +2.27.0 + diff --git a/qapi-block-core-Introduce-BackupCommon.patch b/qapi-block-core-Introduce-BackupCommon.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d160748c79371b993fd4f0c82a48b535d7b29cd --- /dev/null +++ b/qapi-block-core-Introduce-BackupCommon.patch @@ -0,0 +1,171 @@ +From 2204b4839fb90658e13ddc608df7b35ed1ea9fd0 Mon Sep 17 00:00:00 2001 +From: John Snow +Date: Mon, 29 Jul 2019 16:35:52 -0400 +Subject: [PATCH] qapi/block-core: Introduce BackupCommon + +drive-backup and blockdev-backup have an awful lot of things in common +that are the same. Let's fix that. + +I don't deduplicate 'target', because the semantics actually did change +between each structure. Leave that one alone so it can be documented +separately. + +Where documentation was not identical, use the most up-to-date version. +For "speed", use Blockdev-Backup's version. For "sync", use +Drive-Backup's version. + +Signed-off-by: John Snow +Reviewed-by: Max Reitz +[Maintainer edit: modified commit message. --js] +Reviewed-by: Markus Armbruster +Message-id: 20190709232550.10724-2-jsnow@redhat.com +Signed-off-by: John Snow +--- + qapi/block-core.json | 95 ++++++++++++++------------------------------ + 1 file changed, 29 insertions(+), 66 deletions(-) + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index db24f0dfe5..37aa1b7b9a 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1315,32 +1315,23 @@ + 'data': { 'node': 'str', 'overlay': 'str' } } + + ## +-# @DriveBackup: ++# @BackupCommon: + # + # @job-id: identifier for the newly-created block job. If + # omitted, the device name will be used. (Since 2.7) + # + # @device: the device name or node-name of a root node which should be copied. + # +-# @target: the target of the new image. If the file exists, or if it +-# is a device, the existing file/device will be used as the new +-# destination. If it does not exist, a new file will be created. +-# +-# @format: the format of the new destination, default is to +-# probe if @mode is 'existing', else the format of the source +-# + # @sync: what parts of the disk image should be copied to the destination + # (all the disk, only the sectors allocated in the topmost image, from a + # dirty bitmap, or only new I/O). + # +-# @mode: whether and how QEMU should create a new image, default is +-# 'absolute-paths'. +-# +-# @speed: the maximum speed, in bytes per second ++# @speed: the maximum speed, in bytes per second. The default is 0, ++# for unlimited. + # + # @bitmap: the name of dirty bitmap if sync is "incremental". + # Must be present if sync is "incremental", must NOT be present +-# otherwise. (Since 2.4) ++# otherwise. (Since 2.4 (drive-backup), 3.1 (blockdev-backup)) + # + # @compress: true to compress data, if the target format supports it. + # (default: false) (since 2.8) +@@ -1370,75 +1361,47 @@ + # I/O. If an error occurs during a guest write request, the device's + # rerror/werror actions will be used. + # +-# Since: 1.6 ++# Since: 4.2 + ## +-{ 'struct': 'DriveBackup', +- 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', +- '*format': 'str', 'sync': 'MirrorSyncMode', +- '*mode': 'NewImageMode', '*speed': 'int', ++{ 'struct': 'BackupCommon', ++ 'data': { '*job-id': 'str', 'device': 'str', ++ 'sync': 'MirrorSyncMode', '*speed': 'int', + '*bitmap': 'str', '*compress': 'bool', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError', + '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } + + ## +-# @BlockdevBackup: +-# +-# @job-id: identifier for the newly-created block job. If +-# omitted, the device name will be used. (Since 2.7) +-# +-# @device: the device name or node-name of a root node which should be copied. +-# +-# @target: the device name or node-name of the backup target node. +-# +-# @sync: what parts of the disk image should be copied to the destination +-# (all the disk, only the sectors allocated in the topmost image, or +-# only new I/O). +-# +-# @speed: the maximum speed, in bytes per second. The default is 0, +-# for unlimited. +-# +-# @bitmap: the name of dirty bitmap if sync is "incremental". +-# Must be present if sync is "incremental", must NOT be present +-# otherwise. (Since 3.1) +-# +-# @compress: true to compress data, if the target format supports it. +-# (default: false) (since 2.8) ++# @DriveBackup: + # +-# @on-source-error: the action to take on an error on the source, +-# default 'report'. 'stop' and 'enospc' can only be used +-# if the block device supports io-status (see BlockInfo). ++# @target: the target of the new image. If the file exists, or if it ++# is a device, the existing file/device will be used as the new ++# destination. If it does not exist, a new file will be created. + # +-# @on-target-error: the action to take on an error on the target, +-# default 'report' (no limitations, since this applies to +-# a different block device than @device). ++# @format: the format of the new destination, default is to ++# probe if @mode is 'existing', else the format of the source + # +-# @auto-finalize: When false, this job will wait in a PENDING state after it has +-# finished its work, waiting for @block-job-finalize before +-# making any block graph changes. +-# When true, this job will automatically +-# perform its abort or commit actions. +-# Defaults to true. (Since 2.12) ++# @mode: whether and how QEMU should create a new image, default is ++# 'absolute-paths'. + # +-# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it +-# has completely ceased all work, and awaits @block-job-dismiss. +-# When true, this job will automatically disappear from the query +-# list without user intervention. +-# Defaults to true. (Since 2.12) ++# Since: 1.6 ++## ++{ 'struct': 'DriveBackup', ++ 'base': 'BackupCommon', ++ 'data': { 'target': 'str', ++ '*format': 'str', ++ '*mode': 'NewImageMode' } } ++ ++## ++# @BlockdevBackup: + # +-# Note: @on-source-error and @on-target-error only affect background +-# I/O. If an error occurs during a guest write request, the device's +-# rerror/werror actions will be used. ++# @target: the device name or node-name of the backup target node. + # + # Since: 2.3 + ## + { 'struct': 'BlockdevBackup', +- 'data': { '*job-id': 'str', 'device': 'str', 'target': 'str', +- 'sync': 'MirrorSyncMode', '*speed': 'int', +- '*bitmap': 'str', '*compress': 'bool', +- '*on-source-error': 'BlockdevOnError', +- '*on-target-error': 'BlockdevOnError', +- '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } ++ 'base': 'BackupCommon', ++ 'data': { 'target': 'str' } } + + ## + # @blockdev-snapshot-sync: +-- +2.27.0 + diff --git a/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch b/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2a4e5c26f50c820eb9122e8a8449b76713a8db7 --- /dev/null +++ b/qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch @@ -0,0 +1,35 @@ +From 405deba14f6b61b9c557484b46e863308c8cf373 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 28 Oct 2019 17:18:40 +0100 +Subject: [PATCH] qcow2: Fix QCOW2_COMPRESSED_SECTOR_MASK + +Masks for L2 table entries should have 64 bit. + +Fixes: b6c246942b14d3e0dec46a6c5868ed84e7dbea19 +Buglink: https://bugs.launchpad.net/qemu/+bug/1850000 +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Message-id: 20191028161841.1198-2-mreitz@redhat.com +Reviewed-by: Alberto Garcia +Signed-off-by: Max Reitz +(cherry picked from commit 24552feb6ae2f615b76c2b95394af43901f75046) +Signed-off-by: Michael Roth +--- + block/qcow2.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/qcow2.h b/block/qcow2.h +index fc1b0d3c1e..359197f89f 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -77,7 +77,7 @@ + + /* Defined in the qcow2 spec (compressed cluster descriptor) */ + #define QCOW2_COMPRESSED_SECTOR_SIZE 512U +-#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1)) ++#define QCOW2_COMPRESSED_SECTOR_MASK (~(QCOW2_COMPRESSED_SECTOR_SIZE - 1ULL)) + + /* Must be at least 2 to cover COW */ + #define MIN_L2_CACHE_SIZE 2 /* cache entries */ +-- +2.23.0 diff --git a/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch b/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4c25806d7f7b99408a0419987c22c2175f4fee3 --- /dev/null +++ b/qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch @@ -0,0 +1,71 @@ +From 416a692e51b8b582407e30046ddcffbbe52ecf77 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 24 Oct 2019 16:26:58 +0200 +Subject: [PATCH] qcow2: Fix corruption bug in + qcow2_detect_metadata_preallocation() + +qcow2_detect_metadata_preallocation() calls qcow2_get_refcount() which +requires s->lock to be taken to protect its accesses to the refcount +table and refcount blocks. However, nothing in this code path actually +took the lock. This could cause the same cache entry to be used by two +requests at the same time, for different tables at different offsets, +resulting in image corruption. + +As it would be preferable to base the detection on consistent data (even +though it's just heuristics), let's take the lock not only around the +qcow2_get_refcount() calls, but around the whole function. + +This patch takes the lock in qcow2_co_block_status() earlier and asserts +in qcow2_detect_metadata_preallocation() that we hold the lock. + +Fixes: 69f47505ee66afaa513305de0c1895a224e52c45 +Cc: qemu-stable@nongnu.org +Reported-by: Michael Weiser +Signed-off-by: Kevin Wolf +Tested-by: Michael Weiser +Reviewed-by: Michael Weiser +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +(cherry picked from commit 5e9785505210e2477e590e61b1ab100d0ec22b01) +Signed-off-by: Michael Roth +--- + block/qcow2-refcount.c | 2 ++ + block/qcow2.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index ef965d7895..0d64bf5a5e 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -3455,6 +3455,8 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs) + int64_t i, end_cluster, cluster_count = 0, threshold; + int64_t file_length, real_allocation, real_clusters; + ++ qemu_co_mutex_assert_locked(&s->lock); ++ + file_length = bdrv_getlength(bs->file->bs); + if (file_length < 0) { + return file_length; +diff --git a/block/qcow2.c b/block/qcow2.c +index 865839682c..c0f5439dc8 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1899,6 +1899,8 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + unsigned int bytes; + int status = 0; + ++ qemu_co_mutex_lock(&s->lock); ++ + if (!s->metadata_preallocation_checked) { + ret = qcow2_detect_metadata_preallocation(bs); + s->metadata_preallocation = (ret == 1); +@@ -1906,7 +1908,6 @@ static int coroutine_fn qcow2_co_block_status(BlockDriverState *bs, + } + + bytes = MIN(INT_MAX, count); +- qemu_co_mutex_lock(&s->lock); + ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset); + qemu_co_mutex_unlock(&s->lock); + if (ret < 0) { +-- +2.23.0 diff --git a/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch b/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d9b71c70ea503d0bc2480439d5826357e5ab931 --- /dev/null +++ b/qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch @@ -0,0 +1,39 @@ +From fad649b88c93d0567be4e426f23063b439037095 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 11 Feb 2020 10:48:59 +0100 +Subject: [PATCH] qcow2: Fix qcow2_alloc_cluster_abort() for external data file + +For external data file, cluster allocations return an offset in the data +file and are not refcounted. In this case, there is nothing to do for +qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file +is wrong and causes crashes in the better case or image corruption in +the worse case. + +Signed-off-by: Kevin Wolf +Message-Id: <20200211094900.17315-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +--- + block/qcow2-cluster.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c +index f8576031b6..7e7e051437 100644 +--- a/block/qcow2-cluster.c ++++ b/block/qcow2-cluster.c +@@ -1026,8 +1026,11 @@ err: + void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) + { + BDRVQcow2State *s = bs->opaque; +- qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, +- QCOW2_DISCARD_NEVER); ++ if (!has_data_file(bs)) { ++ qcow2_free_clusters(bs, m->alloc_offset, ++ m->nb_clusters << s->cluster_bits, ++ QCOW2_DISCARD_NEVER); ++ } + } + + /* +-- +2.27.0 + diff --git a/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch b/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch new file mode 100644 index 0000000000000000000000000000000000000000..be2c3c72ced8b33f569fabef0c1f01dd382993ef --- /dev/null +++ b/qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch @@ -0,0 +1,58 @@ +From c9ffb12754b1575babfef45168b6e1b1af80a95f Mon Sep 17 00:00:00 2001 +From: Alberto Garcia +Date: Fri, 16 Aug 2019 15:17:42 +0300 +Subject: [PATCH] qcow2: Fix the calculation of the maximum L2 cache size + +The size of the qcow2 L2 cache defaults to 32 MB, which can be easily +larger than the maximum amount of L2 metadata that the image can have. +For example: with 64 KB clusters the user would need a qcow2 image +with a virtual size of 256 GB in order to have 32 MB of L2 metadata. + +Because of that, since commit b749562d9822d14ef69c9eaa5f85903010b86c30 +we forbid the L2 cache to become larger than the maximum amount of L2 +metadata for the image, calculated using this formula: + + uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); + +The problem with this formula is that the result should be rounded up +to the cluster size because an L2 table on disk always takes one full +cluster. + +For example, a 1280 MB qcow2 image with 64 KB clusters needs exactly +160 KB of L2 metadata, but we need 192 KB on disk (3 clusters) even if +the last 32 KB of those are not going to be used. + +However QEMU rounds the numbers down and only creates 2 cache tables +(128 KB), which is not enough for the image. + +A quick test doing 4KB random writes on a 1280 MB image gives me +around 500 IOPS, while with the correct cache size I get 16K IOPS. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Alberto Garcia +Signed-off-by: Kevin Wolf +(cherry picked from commit b70d08205b2e4044c529eefc21df2c8ab61b473b) +Signed-off-by: Michael Roth +--- + block/qcow2.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 039bdc2f7e..865839682c 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -826,7 +826,11 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, + bool l2_cache_entry_size_set; + int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size; + uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; +- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8); ++ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size); ++ /* An L2 table is always one cluster in size so the max cache size ++ * should be a multiple of the cluster size. */ ++ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t), ++ s->cluster_size); + + combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE); + l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE); +-- +2.23.0 diff --git a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch new file mode 100644 index 0000000000000000000000000000000000000000..85467e8412ec264d6034f59ae3704a3042d1e5e0 --- /dev/null +++ b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch @@ -0,0 +1,30 @@ +From 4f1396f9e173a24f78204b8849c209100499d639 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Thu, 29 Jul 2021 15:24:48 +0800 +Subject: [PATCH] qdev/monitors: Fix reundant error_setg of qdev_add_device + +There is an extra log "error_setg" in qdev_add_device(). When +hot-plug a device, if the corresponding bus doesn't exist, it +will trigger an asseration "assert(*errp == NULL)". + +Fixes: 515a7970490 (log: Add some logs on VM runtime path) +Signed-off-by: Kunkun Jiang +--- + qdev-monitor.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/qdev-monitor.c b/qdev-monitor.c +index c6c1d3f06a..ab2bdef105 100644 +--- a/qdev-monitor.c ++++ b/qdev-monitor.c +@@ -587,7 +587,6 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { +- error_setg(errp, "can not find bus for %s", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +-- +2.27.0 + diff --git a/qemu-4.0.1.tar.xz b/qemu-4.1.0.tar.xz similarity index 79% rename from qemu-4.0.1.tar.xz rename to qemu-4.1.0.tar.xz index 703a6c2ff7a99ef107bc55570fc0d52b5e79f0a9..79ad0661eda38092de13a677ef70eeaece3ad848 100644 Binary files a/qemu-4.0.1.tar.xz and b/qemu-4.1.0.tar.xz differ diff --git a/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch b/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch deleted file mode 100644 index 8cd599a13bf7a4e4e2b45ab014cd05809051e4f7..0000000000000000000000000000000000000000 --- a/qemu-bridge-helper-move-repeating-code-in-parse_acl.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 3283dde4b5b5cce0f96f48d536bebff66d97ce0b Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 23 Jul 2019 16:17:53 +0530 -Subject: [PATCH 2/2] qemu-bridge-helper: move repeating code in parse_acl_file -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Move repeating error handling sequence in parse_acl_file routine -to an 'err' label. - -This patch fixes CVE-2019-13164. - -Signed-off-by: Prasad J Pandit -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Li Qiang -Signed-off-by: Jason Wang -(cherry-picked from commit 3283dde4b5b5cce0f96f48d536bebff66d97ce0b) ---- - qemu-bridge-helper.c | 19 +++++++++---------- - 1 file changed, 9 insertions(+), 10 deletions(-) - -diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c -index 2058e10454..3d50ec094c 100644 ---- a/qemu-bridge-helper.c -+++ b/qemu-bridge-helper.c -@@ -102,9 +102,7 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) - - if (arg == NULL) { - fprintf(stderr, "Invalid config line:\n %s\n", line); -- fclose(f); -- errno = EINVAL; -- return -1; -+ goto err; - } - - *arg = 0; -@@ -121,9 +119,7 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) - - if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) { - fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg)); -- fclose(f); -- errno = EINVAL; -- return -1; -+ goto err; - } - - if (strcmp(cmd, "deny") == 0) { -@@ -149,15 +145,18 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) - parse_acl_file(arg, acl_list); - } else { - fprintf(stderr, "Unknown command `%s'\n", cmd); -- fclose(f); -- errno = EINVAL; -- return -1; -+ goto err; - } - } - - fclose(f); -- - return 0; -+ -+err: -+ fclose(f); -+ errno = EINVAL; -+ return -1; -+ - } - - static bool has_vnet_hdr(int fd) --- -2.19.1 - diff --git a/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch b/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch deleted file mode 100644 index b6dc25e4dfdc9e204c78c545ad5582c52c07dc26..0000000000000000000000000000000000000000 --- a/qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 6f5d8671225dc77190647f18a27a0d156d4ca97a Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Tue, 23 Jul 2019 16:17:52 +0530 -Subject: [PATCH 1/2] qemu-bridge-helper: restrict interface name to IFNAMSIZ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The network interface name in Linux is defined to be of size -IFNAMSIZ(=16), including the terminating null('\0') byte. -The same is applied to interface names read from 'bridge.conf' -file to form ACL rules. If user supplied '--br=bridge' name -is not restricted to the same length, it could lead to ACL bypass -issue. Restrict interface name to IFNAMSIZ, including null byte. - -This patch fixes CVE-2019-13164. - -Reported-by: Riccardo Schirone -Signed-off-by: Prasad J Pandit -Reviewed-by: Stefan Hajnoczi -Reviewed-by: Daniel P. Berrangé -Reviewed-by: Li Qiang -Signed-off-by: Jason Wang -(cherry-picked from commit 6f5d8671225dc77190647f18a27a0d156d4ca97a) ---- - qemu-bridge-helper.c | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/qemu-bridge-helper.c b/qemu-bridge-helper.c -index 95624bc300..2058e10454 100644 ---- a/qemu-bridge-helper.c -+++ b/qemu-bridge-helper.c -@@ -119,6 +119,13 @@ static int parse_acl_file(const char *filename, ACLList *acl_list) - } - *argend = 0; - -+ if (!g_str_equal(cmd, "include") && strlen(arg) >= IFNAMSIZ) { -+ fprintf(stderr, "name `%s' too long: %zu\n", arg, strlen(arg)); -+ fclose(f); -+ errno = EINVAL; -+ return -1; -+ } -+ - if (strcmp(cmd, "deny") == 0) { - acl_rule = g_malloc(sizeof(*acl_rule)); - if (strcmp(arg, "all") == 0) { -@@ -269,6 +276,10 @@ int main(int argc, char **argv) - usage(); - return EXIT_FAILURE; - } -+ if (strlen(bridge) >= IFNAMSIZ) { -+ fprintf(stderr, "name `%s' too long: %zu\n", bridge, strlen(bridge)); -+ return EXIT_FAILURE; -+ } - - /* parse default acl file */ - QSIMPLEQ_INIT(&acl_list); --- -2.19.1 - diff --git a/qemu-file-Don-t-do-IO-after-shutdown.patch b/qemu-file-Don-t-do-IO-after-shutdown.patch new file mode 100644 index 0000000000000000000000000000000000000000..72cfc4d7c97cc130be5ffb5556852790c17b2afa --- /dev/null +++ b/qemu-file-Don-t-do-IO-after-shutdown.patch @@ -0,0 +1,81 @@ +From 1f8bc46e8af4ffe6d062f378bd11e0ad70d30ac8 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 2 Dec 2020 14:25:13 +0800 +Subject: [PATCH] qemu-file: Don't do IO after shutdown + +Be sure that we are not doing neither read/write after shutdown of the +QEMUFile. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +--- + migration/qemu-file.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 18f480529a..cd96d04e9a 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -51,6 +51,8 @@ struct QEMUFile { + unsigned int iovcnt; + + int last_error; ++ /* has the file has been shutdown */ ++ bool shutdown; + }; + + /* +@@ -59,10 +61,18 @@ struct QEMUFile { + */ + int qemu_file_shutdown(QEMUFile *f) + { ++ int ret; ++ ++ f->shutdown = true; + if (!f->ops->shut_down) { + return -ENOSYS; + } +- return f->ops->shut_down(f->opaque, true, true); ++ ++ ret = f->ops->shut_down(f->opaque, true, true); ++ if (!f->last_error) { ++ qemu_file_set_error(f, -EIO); ++ } ++ return ret; + } + + /* +@@ -181,6 +191,10 @@ void qemu_fflush(QEMUFile *f) + return; + } + ++ if (f->shutdown) { ++ return; ++ } ++ + if (f->iovcnt > 0) { + expect = iov_size(f->iov, f->iovcnt); + ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, f->pos); +@@ -293,6 +307,9 @@ static ssize_t qemu_fill_buffer(QEMUFile *f) + f->buf_index = 0; + f->buf_size = pending; + ++ if (f->shutdown) { ++ return 0; ++ } + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, + IO_BUF_SIZE - pending); + if (len > 0) { +@@ -591,6 +608,9 @@ int64_t qemu_ftell(QEMUFile *f) + + int qemu_file_rate_limit(QEMUFile *f) + { ++ if (f->shutdown) { ++ return 1; ++ } + if (qemu_file_get_error(f)) { + return 1; + } +-- +2.27.0 + diff --git a/qemu-img-convert-Don-t-pre-zero-images.patch b/qemu-img-convert-Don-t-pre-zero-images.patch new file mode 100644 index 0000000000000000000000000000000000000000..925590c34903cd73307b3f806a0b407c6c744fb5 --- /dev/null +++ b/qemu-img-convert-Don-t-pre-zero-images.patch @@ -0,0 +1,73 @@ +From a2fcbe2b82c42f890a857ad8d4edcfdb273106ea Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 31 Jul 2020 08:18:31 -0400 +Subject: [PATCH] qemu-img convert: Don't pre-zero images + +RH-Author: Kevin Wolf +Message-id: <20200731081831.13781-2-kwolf@redhat.com> +Patchwork-id: 98117 +O-Subject: [RHEL-AV-8.2.1.z qemu-kvm PATCH 1/1] qemu-img convert: Don't pre-zero images +Bugzilla: 1861682 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Max Reitz +RH-Acked-by: Eric Blake + +Since commit 5a37b60a61c, qemu-img create will pre-zero the target image +if it isn't already zero-initialised (most importantly, for host block +devices, but also iscsi etc.), so that writing explicit zeros wouldn't +be necessary later. + +This could speed up the operation significantly, in particular when the +source image file was only sparsely populated. However, it also means +that some block are written twice: Once when pre-zeroing them, and then +when they are overwritten with actual data. On a full image, the +pre-zeroing is wasted work because everything will be overwritten. + +In practice, write_zeroes typically turns out faster than writing +explicit zero buffers, but slow enough that first zeroing everything and +then overwriting parts can be a significant net loss. + +Meanwhile, qemu-img convert was rewritten in 690c7301600 and zero blocks +are now written to the target using bdrv_co_pwrite_zeroes() if the +target could be pre-zeroed. This way we already make use of the faster +write_zeroes operation, but avoid writing any blocks twice. + +Remove the pre-zeroing because these days this former optimisation has +actually turned into a pessimisation in the common case. + +Reported-by: Nir Soffer +Signed-off-by: Kevin Wolf +Message-Id: <20200622151203.35624-1-kwolf@redhat.com> +Tested-by: Nir Soffer +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit edafc70c0c8510862f2f213a3acf7067113bcd08) +Signed-off-by: Kevin Wolf +Signed-off-by: Danilo C. L. de Paula +--- + qemu-img.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 2e9cc5db7c..e4abd4978a 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -1981,15 +1981,6 @@ static int convert_do_copy(ImgConvertState *s) + ? bdrv_has_zero_init(blk_bs(s->target)) + : false; + +- if (!s->has_zero_init && !s->target_has_backing && +- bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) +- { +- ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); +- if (ret == 0) { +- s->has_zero_init = true; +- } +- } +- + /* Allocate buffer for copied data. For compressed images, only one cluster + * can be copied at a time. */ + if (s->compressed) { +-- +2.27.0 + diff --git a/qemu-img-free-memory-before-re-assign.patch b/qemu-img-free-memory-before-re-assign.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d46d64b1b9664b66efc76ea6490a1bc22663137 --- /dev/null +++ b/qemu-img-free-memory-before-re-assign.patch @@ -0,0 +1,33 @@ +From d22af5cb41c16829dbf3ed3c611ef56ceeb840ff Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Thu, 27 Feb 2020 09:29:50 +0800 +Subject: [PATCH 02/14] qemu-img: free memory before re-assign + +collect_image_check() is called twice in img_check(), the filename/format will be alloced without free the original memory. +It is not a big deal since the process will exit anyway, but seems like a clean code and it will remove the warning spotted by asan. + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Message-Id: <20200227012950.12256-3-pannengyuan@huawei.com> +Signed-off-by: Max Reitz +Signed-off-by: Peng Liang +--- + qemu-img.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index 79983772de39..2e9cc5db7c4c 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -808,6 +808,8 @@ static int img_check(int argc, char **argv) + check->corruptions_fixed); + } + ++ qapi_free_ImageCheck(check); ++ check = g_new0(ImageCheck, 1); + ret = collect_image_check(bs, check, filename, fmt, 0); + + check->leaks_fixed = leaks_fixed; +-- +2.26.2 + diff --git a/qemu.spec b/qemu.spec index eb3cc8989b070c63b09c4a33f53e6f507c59dc65..0e3d75dc1a3a4cbf816f71d0deb6f4be35efdd36 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,68 +1,574 @@ Name: qemu -Version: 4.0.1 -Release: 11 +Version: 4.1.0 +Release: 84 Epoch: 2 Summary: QEMU is a generic and open source machine emulator and virtualizer -License: GPLv2 and BSD and MIT and CC-BY +License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 URL: http://www.qemu.org Source0: https://www.qemu.org/download/%{name}-%{version}%{?rcstr}.tar.xz Source1: 80-kvm.rules Source2: 99-qemu-guest-agent.rules Source3: bridge.conf -Patch0001: qxl-check-release-info-object.patch -Patch0002: ARM64-record-vtimer-tick-when-cpu-is-stopped.patch -Patch0003: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch -Patch0004: pl031-support-rtc-timer-property-for-pl031.patch -Patch0005: vhost-cancel-migration-when-vhost-user-restarted.patch -Patch0006: qcow2-fix-memory-leak-in-qcow2_read_extensions.patch -Patch0007: hw-arm-expose-host-CPU-frequency-info-to-guest.patch -Patch0008: qemu-bridge-helper-restrict-interface-name-to-IFNAMS.patch -Patch0009: qemu-bridge-helper-move-repeating-code-in-parse_acl.patch -Patch0010: smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch -Patch0011: hw-arm-virt-Introduce-cpu-topology-support.patch -Patch0012: hw-arm64-add-vcpu-cache-info-support.patch -Patch0013: xhci-Fix-memory-leak-in-xhci_address_slot.patch -Patch0014: xhci-Fix-memory-leak-in-xhci_kick_epctx.patch -Patch0015: ehci-fix-queue-dev-null-ptr-dereference.patch -Patch0016: memory-unref-the-memory-region-in-simplify-flatview.patch -Patch0017: util-async-hold-AioContext-ref-to-prevent-use-after-free.patch -Patch0018: vhost-user-scsi-prevent-using-uninitialized-vqs.patch -Patch0019: cpu-add-Kunpeng-920-cpu-support.patch -Patch0020: cpu-parse-feature-to-avoid-failure.patch -Patch0021: cpu-add-Cortex-A72-processor-kvm-target-support.patch -Patch0022: vnc-fix-memory-leak-when-vnc-disconnect.patch -Patch0023: pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch -Patch0024: linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch -Patch0025: intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch -Patch0026: ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp_.patch -Patch0027: 9pfs-local-Fix-possible-memory-leak-in-local_link.patch -Patch0028: scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch -Patch0029: arm-translate-a64-fix-uninitialized-variable-warning.patch -Patch0030: nbd-fix-uninitialized-variable-warning.patch -Patch0031: xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch -Patch0032: block-fix-memleaks-in-bdrv_refresh_filename.patch -Patch0033: iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch -Patch0034: tcp_emu-Fix-oob-access.patch -Patch0035: slirp-use-correct-size-while-emulating-IRC-commands.patch -Patch0036: slirp-use-correct-size-while-emulating-commands.patch -Patch0037: tcp_emu-fix-unsafe-snprintf-usages.patch -Patch0038: block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch -Patch0039: monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch +Patch0001: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +Patch0002: pl031-support-rtc-timer-property-for-pl031.patch +Patch0003: vhost-cancel-migration-when-vhost-user-restarted.patch +Patch0004: qcow2-fix-memory-leak-in-qcow2_read_extensions.patch +Patch0005: bios-tables-test-prepare-to-change-ARM-virt-ACPI-DSDT.patch +Patch0006: hw-arm-expose-host-CPU-frequency-info-to-guest.patch +Patch0007: smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch +Patch0008: tests-bios-tables-test-disable-this-testcase.patch +Patch0009: hw-arm-virt-Introduce-cpu-topology-support.patch +Patch0010: hw-arm64-add-vcpu-cache-info-support.patch +Patch0011: xhci-Fix-memory-leak-in-xhci_address_slot.patch +Patch0012: xhci-Fix-memory-leak-in-xhci_kick_epctx.patch +Patch0013: ehci-fix-queue-dev-null-ptr-dereference.patch +Patch0014: util-async-hold-AioContext-ref-to-prevent-use-after-free.patch +Patch0015: vhost-user-scsi-prevent-using-uninitialized-vqs.patch +Patch0016: cpu-add-Kunpeng-920-cpu-support.patch +Patch0017: cpu-parse-feature-to-avoid-failure.patch +Patch0018: cpu-add-Cortex-A72-processor-kvm-target-support.patch +Patch0019: pcie-disable-the-PCI_EXP_LINKSTA_DLLA-cap.patch +Patch0020: vnc-fix-memory-leak-when-vnc-disconnect.patch +Patch0021: linux-headers-update-against-KVM-ARM-Fix-256-vcpus.patch +Patch0022: intc-arm_gic-Support-IRQ-injection-for-more-than-256.patch +Patch0023: ARM-KVM-Check-KVM_CAP_ARM_IRQ_LINE_LAYOUT_2-for-smp.patch +Patch0024: 9pfs-local-Fix-possible-memory-leak-in-local_link.patch +Patch0025: scsi-disk-define-props-in-scsi_block_disk-to-avoid-memleaks.patch +Patch0026: arm-translate-a64-fix-uninitialized-variable-warning.patch +Patch0027: nbd-fix-uninitialized-variable-warning.patch +Patch0028: xhci-Fix-memory-leak-in-xhci_kick_epctx-when-poweroff.patch +Patch0029: block-fix-memleaks-in-bdrv_refresh_filename.patch +Patch0030: iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch +Patch0031: tcp_emu-Fix-oob-access.patch +Patch0032: slirp-use-correct-size-while-emulating-IRC-commands.patch +Patch0033: slirp-use-correct-size-while-emulating-commands.patch +Patch0034: util-add-slirp_fmt-helpers.patch +Patch0035: tcp_emu-fix-unsafe-snprintf-usages.patch +Patch0036: block-iscsi-use-MIN-between-mx_sb_len-and-sb_len_wr.patch +Patch0037: monitor-fix-memory-leak-in-monitor_fdset_dup_fd_find.patch +Patch0038: memory-Align-MemoryRegionSections-fields.patch +Patch0039: memory-Provide-an-equality-function-for-MemoryRegion.patch Patch0040: vhost-Fix-memory-region-section-comparison.patch -Patch0041: memory-Align-MemoryRegionSections-fields.patch -Patch0042: memory-Provide-an-equality-function-for-MemoryRegion.patch -Patch0043: file-posix-Handle-undetectable-alignment.patch -Patch0044: block-backup-fix-max_transfer-handling-for-copy_rang.patch -Patch0045: block-backup-fix-backup_cow_with_offload-for-last-cl.patch -Patch0046: qcow2-Limit-total-allocation-range-to-INT_MAX.patch -Patch0047: mirror-Do-not-dereference-invalid-pointers.patch -Patch0048: COLO-compare-Fix-incorrect-if-logic.patch -Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch -Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch -Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0041: file-posix-Handle-undetectable-alignment.patch +Patch0042: block-backup-fix-max_transfer-handling-for-copy_rang.patch +Patch0043: block-backup-fix-backup_cow_with_offload-for-last-cl.patch +Patch0044: qcow2-Limit-total-allocation-range-to-INT_MAX.patch +Patch0045: mirror-Do-not-dereference-invalid-pointers.patch +Patch0046: COLO-compare-Fix-incorrect-if-logic.patch +Patch0047: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch +Patch0048: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch +Patch0049: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0050: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch +Patch0051: async-use-explicit-memory-barriers.patch +Patch0052: dma-helpers-ensure-AIO-callback-is-invoked-after-can.patch +Patch0053: Revert-ide-ahci-Check-for-ECANCELED-in-aio-callbacks.patch +Patch0054: pc-Don-t-make-die-id-mandatory-unless-necessary.patch +Patch0055: block-file-posix-Reduce-xfsctl-use.patch +Patch0056: pr-manager-Fix-invalid-g_free-crash-bug.patch +Patch0057: x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch +Patch0058: vpc-Return-0-from-vpc_co_create-on-success.patch +Patch0059: target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch +Patch0060: target-arm-Don-t-abort-on-M-profile-exception-return.patch +Patch0061: libvhost-user-fix-SLAVE_SEND_FD-handling.patch +Patch0062: qcow2-Fix-the-calculation-of-the-maximum-L2-cache-si.patch +Patch0063: block-nfs-tear-down-aio-before-nfs_close.patch +Patch0064: blockjob-update-nodes-head-while-removing-all-bdrv.patch +Patch0065: block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch +Patch0066: coroutine-Add-qemu_co_mutex_assert_locked.patch +Patch0067: qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch +Patch0068: hw-arm-boot.c-Set-NSACR.-CP11-CP10-for-NS-kernel-boo.patch +Patch0069: make-release-pull-in-edk2-submodules-so-we-can-build.patch +Patch0070: roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch +Patch0071: block-snapshot-Restrict-set-of-snapshot-nodes.patch +Patch0072: vhost-user-save-features-if-the-char-dev-is-closed.patch +Patch0073: hw-core-loader-Fix-possible-crash-in-rom_copy.patch +Patch0074: ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch +Patch0075: virtio-new-post_load-hook.patch +Patch0076: virtio-net-prevent-offloads-reset-on-migration.patch +Patch0077: util-hbitmap-strict-hbitmap_reset.patch +Patch0078: hbitmap-handle-set-reset-with-zero-length.patch +Patch0079: target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch +Patch0080: scsi-lsi-exit-infinite-loop-while-executing-script-C.patch +Patch0081: virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch +Patch0082: qcow2-Fix-QCOW2_COMPRESSED_SECTOR_MASK.patch +Patch0083: util-iov-introduce-qemu_iovec_init_extended.patch +Patch0084: util-iov-improve-qemu_iovec_is_zero.patch +Patch0085: block-io-refactor-padding.patch +Patch0086: block-Make-wait-mark-serialising-requests-public.patch +Patch0087: block-Add-bdrv_co_get_self_request.patch +Patch0088: block-file-posix-Let-post-EOF-fallocate-serialize.patch +Patch0089: block-posix-Always-allocate-the-first-block.patch +Patch0090: block-create-Do-not-abort-if-a-block-driver-is-not-a.patch +Patch0091: mirror-Keep-mirror_top_bs-drained-after-dropping-per.patch +Patch0092: target-arm-kvm-trivial-Clean-up-header-documentation.patch +Patch0093: target-arm-kvm64-kvm64-cpus-have-timer-registers.patch +Patch0094: target-arm-kvm-Implement-virtual-time-adjustment.patch +Patch0095: target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch +Patch0096: hw-acpi-Make-ACPI-IO-address-space-configurable.patch +Patch0097: hw-acpi-Do-not-create-memory-hotplug-method-when-han.patch +Patch0098: hw-acpi-Add-ACPI-Generic-Event-Device-Support.patch +Patch0099: hw-arm-virt-Add-memory-hotplug-framework.patch +Patch0100: hw-arm-virt-Enable-device-memory-cold-hot-plug-with-.patch +Patch0101: hw-arm-virt-acpi-build-Add-PC-DIMM-in-SRAT.patch +Patch0102: hw-arm-Factor-out-powerdown-notifier-from-GPIO.patch +Patch0103: hw-arm-Use-GED-for-system_powerdown-event.patch +Patch0104: docs-specs-Add-ACPI-GED-documentation.patch +Patch0105: tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch +Patch0106: tests-acpi-add-empty-files.patch +Patch0107: tests-allow-empty-expected-files.patch +Patch0108: tests-Add-bios-tests-to-arm-virt.patch +Patch0109: tests-document-how-to-update-acpi-tables.patch +Patch0110: hw-arm-virt-Simplify-by-moving-the-gic-in-the-machin.patch +Patch0111: bugfix-Use-gicr_typer-in-arm_gicv3_icc_reset.patch +Patch0112: Typo-Correct-the-name-of-CPU-hotplug-memory-region.patch +Patch0113: acpi-madt-Factor-out-the-building-of-MADT-GICC-struc.patch +Patch0114: acpi-ged-Add-virt_madt_cpu_entry-to-madt_cpu-hook.patch +Patch0115: arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch +Patch0116: acpi-cpu-Prepare-build_cpus_aml-for-arm-virt.patch +Patch0117: acpi-ged-Extend-ACPI-GED-to-support-CPU-hotplug.patch +Patch0118: arm-cpu-assign-arm_get_arch_id-handler-to-get_arch_i.patch +Patch0119: arm-virt-Attach-ACPI-CPU-hotplug-support-to-virt.patch +Patch0120: arm-virt-Add-CPU-hotplug-framework.patch +Patch0121: arm-virt-Add-CPU-topology-support.patch +Patch0122: test-numa-Adjust-aarch64-numa-test.patch +Patch0123: hw-arm-virt-Factor-out-some-CPU-init-codes-to-pre_pl.patch +Patch0124: hw-arm-boot-Add-manually-register-and-trigger-of-CPU.patch +Patch0125: arm-virt-gic-Construct-irqs-connection-from-create_g.patch +Patch0126: intc-gicv3_common-Factor-out-arm_gicv3_common_cpu_re.patch +Patch0127: intc-gicv3_cpuif-Factor-out-gicv3_init_one_cpuif.patch +Patch0128: intc-kvm_gicv3-Factor-out-kvm_arm_gicv3_cpu_realize.patch +Patch0129: hw-intc-gicv3-Add-CPU-hotplug-realize-hook.patch +Patch0130: accel-kvm-Add-pre-park-vCPU-support.patch +Patch0131: intc-gicv3-Add-pre-sizing-capability-to-GICv3.patch +Patch0132: acpi-madt-Add-pre-sizing-capability-to-MADT-GICC-str.patch +Patch0133: arm-virt-Add-cpu_hotplug_enabled-field.patch +Patch0134: arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch +Patch0135: arm-virt-Pre-sizing-MADT-GICC-PPTT-GICv3-and-Pre-par.patch +Patch0136: arm-virt-Add-some-sanity-checks-in-cpu_pre_plug-hook.patch +Patch0137: arm-virt-Start-up-CPU-hot-plug.patch +Patch0138: migration-always-initialise-ram_counters-for-a-new-m.patch +Patch0139: migration-add-qemu_file_update_transfer-interface.patch +Patch0140: migration-add-speed-limit-for-multifd-migration.patch +Patch0141: migration-update-ram_counters-for-multifd-sync-packe.patch +Patch0142: migration-Make-global-sem_sync-semaphore-by-channel.patch +Patch0143: migration-multifd-fix-nullptr-access-in-terminating-m.patch +Patch0144: migration-Maybe-VM-is-paused-when-migration-is-cance.patch +Patch0145: migration-multifd-fix-potential-wrong-acception-orde.patch +Patch0146: migration-multifd-fix-destroyed-mutex-access-in-term.patch +Patch0147: migration-multifd-fix-nullptr-access-in-multifd_send.patch +Patch0148: vtimer-compat-cross-version-migration-from-v4.0.1.patch +Patch0149: migration-ram-Do-error_free-after-migrate_set_error-.patch +Patch0150: migration-ram-fix-memleaks-in-multifd_new_send_chann.patch +Patch0151: migration-rdma-fix-a-memleak-on-error-path-in-rdma_s.patch +Patch0152: arm-virt-Support-CPU-cold-plug.patch +Patch0153: ide-Fix-incorrect-handling-of-some-PRDTs-in-ide_dma_.patch +Patch0154: ati-vga-Fix-checks-in-ati_2d_blt-to-avoid-crash.patch +Patch0155: slirp-tftp-restrict-relative-path-access.patch +Patch0156: ip_reass-Fix-use-after-free.patch +Patch0157: bt-use-size_t-type-for-length-parameters-instead-of-.patch +Patch0158: log-Add-some-logs-on-VM-runtime-path.patch +Patch0159: Revert-vtimer-compat-cross-version-migration-from-v4.patch +Patch0160: ARM64-record-vtimer-tick-when-cpu-is-stopped.patch +Patch0161: hw-arm-virt-add-missing-compat-for-kvm-no-adjvtime.patch +Patch0162: migration-Compat-virtual-timer-adjust-for-v4.0.1-and.patch +Patch0163: vtimer-Drop-vtimer-virtual-timer-adjust.patch +Patch0164: target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch +Patch0165: target-arm-Fix-PAuth-sbox-functions.patch +Patch0166: tests-Disalbe-filemonitor-testcase.patch +Patch0167: es1370-check-total-frame-count-against-current-frame.patch +Patch0168: exec-set-map-length-to-zero-when-returning-NULL.patch +Patch0169: ati-vga-check-mm_index-before-recursive-call-CVE-202.patch +Patch0170: megasas-use-unsigned-type-for-reply_queue_head-and-c.patch +Patch0171: megasas-avoid-NULL-pointer-dereference.patch +Patch0172: megasas-use-unsigned-type-for-positive-numeric-field.patch +Patch0173: hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch +Patch0174: hw-arm-acpi-enable-SHPC-native-hot-plug.patch +Patch0175: hw-tpm-rename-Error-parameter-to-more-common-errp.patch +Patch0176: tpm-ppi-page-align-PPI-RAM.patch +Patch0177: tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch +Patch0178: spapr-Implement-get_dt_compatible-callback.patch +Patch0179: delete-the-in-tpm.txt.patch +Patch0180: tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch +Patch0181: tpm_spapr-Support-suspend-and-resume.patch +Patch0182: hw-ppc-Kconfig-Enable-TPM_SPAPR-as-part-of-PSERIES-c.patch +Patch0183: docs-specs-tpm-reST-ify-TPM-documentation.patch +Patch0184: tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch +Patch0185: tpm-Use-TPMState-as-a-common-struct.patch +Patch0186: tpm-Separate-tpm_tis-common-functions-from-isa-code.patch +Patch0187: tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch +Patch0188: tpm-Add-the-SysBus-TPM-TIS-device.patch +Patch0189: hw-arm-virt-vTPM-support.patch +Patch0190: docs-specs-tpm-Document-TPM_TIS-sysbus-device-for-AR.patch +Patch0191: test-tpm-pass-optional-machine-options-to-swtpm-test.patch +Patch0192: test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch +Patch0193: test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch +Patch0194: build-smt-processor-structure-to-support-smt-topolog.patch +Patch0195: target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch +Patch0196: target-arm-Add-ID_AA64MMFR2_EL1.patch +Patch0197: target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch +Patch0198: target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch +Patch0199: target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch +Patch0200: target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch +Patch0201: target-arm-Stop-assuming-DBGDIDR-always-exists.patch +Patch0202: target-arm-Move-DBGDIDR-into-ARMISARegisters.patch +Patch0203: target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch +Patch0204: target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch +Patch0205: target-arm-Read-debug-related-ID-registers-from-KVM.patch +Patch0206: target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch +Patch0207: target-arm-monitor-query-cpu-model-expansion-crashed.patch +Patch0208: target-arm-convert-isar-regs-to-array.patch +Patch0209: target-arm-parse-cpu-feature-related-options.patch +Patch0210: target-arm-register-CPU-features-for-property.patch +Patch0211: target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch +Patch0212: target-arm-introduce-CPU-feature-dependency-mechanis.patch +Patch0213: target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch +Patch0214: target-arm-Add-CPU-features-to-query-cpu-model-expan.patch +Patch0215: target-arm-Update-ID-fields.patch +Patch0216: target-arm-Add-more-CPU-features.patch +Patch0217: hw-usb-core-fix-buffer-overflow.patch +Patch0218: target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch +Patch0219: Drop-bogus-IPv6-messages.patch +Patch0220: hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch +Patch0221: hw-xhci-check-return-value-of-usb_packet_map.patch +Patch0222: hw-net-xgmac-Fix-buffer-overflow-in-xgmac_enet_send.patch +Patch0223: hw-net-net_tx_pkt-fix-assertion-failure-in-net_tx_pk.patch +Patch0224: sm501-Convert-printf-abort-to-qemu_log_mask.patch +Patch0225: sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch +Patch0226: sm501-Use-BIT-x-macro-to-shorten-constant.patch +Patch0227: sm501-Clean-up-local-variables-in-sm501_2d_operation.patch +Patch0228: sm501-Replace-hand-written-implementation-with-pixma.patch +Patch0229: pci-check-bus-pointer-before-dereference.patch +Patch0230: hw-ide-check-null-block-before-_cancel_dma_sync.patch +Patch0231: elf2dmp-Fix-memory-leak-on-main-error-paths.patch +Patch0232: io-Don-t-use-flag-of-printf-format.patch +Patch0233: hw-display-omap_lcdc-Fix-potential-NULL-pointer-dere.patch +Patch0234: hw-display-exynos4210_fimd-Fix-potential-NULL-pointe.patch +Patch0235: block-vvfat-Fix-bad-printf-format-specifiers.patch +Patch0236: block-Remove-unused-include.patch +Patch0237: ssi-Fix-bad-printf-format-specifiers.patch +Patch0238: net-l2tpv3-Remove-redundant-check-in-net_init_l2tpv3.patch +Patch0239: ati-check-x-y-display-parameter-values.patch +Patch0240: migration-dirtyrate-setup-up-query-dirtyrate-framwor.patch +Patch0241: migration-dirtyrate-add-DirtyRateStatus-to-denote-ca.patch +Patch0242: migration-dirtyrate-Add-RamblockDirtyInfo-to-store-s.patch +Patch0243: migration-dirtyrate-Add-dirtyrate-statistics-series-.patch +Patch0244: migration-dirtyrate-move-RAMBLOCK_FOREACH_MIGRATABLE.patch +Patch0245: migration-dirtyrate-Record-hash-results-for-each-sam.patch +Patch0246: migration-dirtyrate-Compare-page-hash-results-for-re.patch +Patch0247: migration-dirtyrate-skip-sampling-ramblock-with-size.patch +Patch0248: migration-dirtyrate-Implement-set_sample_page_period.patch +Patch0249: migration-dirtyrate-Implement-calculate_dirtyrate-fu.patch +Patch0250: migration-dirtyrate-Implement-qmp_cal_dirty_rate-qmp.patch +Patch0251: migration-dirtyrate-Add-trace_calls-to-make-it-easie.patch +Patch0252: migration-dirtyrate-record-start_time-and-calc_time-.patch +Patch0253: migration-dirtyrate-present-dirty-rate-only-when-que.patch +Patch0254: migration-dirtyrate-simplify-includes-in-dirtyrate.c.patch +Patch0255: migration-tls-save-hostname-into-MigrationState.patch +Patch0256: migration-tls-extract-migration_tls_client_create-fo.patch +Patch0257: migration-tls-add-tls_hostname-into-MultiFDSendParam.patch +Patch0258: migration-tls-extract-cleanup-function-for-common-us.patch +Patch0259: migration-tls-add-support-for-multifd-tls-handshake.patch +Patch0260: migration-tls-add-trace-points-for-multifd-tls.patch +Patch0261: qemu-file-Don-t-do-IO-after-shutdown.patch +Patch0262: multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch +Patch0263: migration-Don-t-send-data-if-we-have-stopped.patch +Patch0264: migration-Create-migration_is_running.patch +Patch0265: migration-fix-COLO-broken-caused-by-a-previous-commi.patch +Patch0266: migration-multifd-fix-hangup-with-TLS-Multifd-due-to.patch +Patch0267: multifd-tls-fix-memoryleak-of-the-QIOChannelSocket-o.patch +Patch0268: net-remove-an-assert-call-in-eth_get_gso_type.patch +Patch0269: json-Fix-a-memleak-in-parse_pair.patch +Patch0270: Bugfix-hw-acpi-Use-max_cpus-instead-of-cpus-when-bui.patch +Patch0271: slirp-check-pkt_len-before-reading-protocol-header.patch +Patch0272: hw-usb-hcd-ohci-check-for-processed-TD-before-retire.patch +Patch0273: hw-ehci-check-return-value-of-usb_packet_map.patch +Patch0274: hw-usb-hcd-ohci-check-len-and-frame_number-variables.patch +Patch0275: hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch +Patch0276: hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch +Patch0277: target-arm-Fix-write-redundant-values-to-kvm.patch +Patch0278: memory-clamp-cached-translation-in-case-it-points-to.patch +Patch0279: scsi-bus-Refactor-the-code-that-retries-requests.patch +Patch0280: scsi-disk-Add-support-for-retry-on-errors.patch +Patch0281: qapi-block-core-Add-retry-option-for-error-action.patch +Patch0282: block-backend-Introduce-retry-timer.patch +Patch0283: block-backend-Add-device-specific-retry-callback.patch +Patch0284: block-backend-Enable-retry-action-on-errors.patch +Patch0285: block-backend-Add-timeout-support-for-retry.patch +Patch0286: block-Add-error-retry-param-setting.patch +Patch0287: virtio-blk-Refactor-the-code-that-processes-queued-r.patch +Patch0288: virtio-blk-On-restart-process-queued-requests-in-the.patch +Patch0289: virtio_blk-Add-support-for-retry-on-errors.patch +Patch0290: migration-Add-multi-thread-compress-method.patch +Patch0291: migration-Refactoring-multi-thread-compress-migratio.patch +Patch0292: migration-Add-multi-thread-compress-ops.patch +Patch0293: migration-Add-zstd-support-in-multi-thread-compressi.patch +Patch0294: migration-Add-compress_level-sanity-check.patch +Patch0295: doc-Update-multi-thread-compression-doc.patch +Patch0296: configure-Enable-test-and-libs-for-zstd.patch +Patch0297: ati-use-vga_read_byte-in-ati_cursor_define.patch +Patch0298: sd-sdhci-assert-data_count-is-within-fifo_buffer.patch +Patch0299: msix-add-valid.accepts-methods-to-check-address.patch +Patch0300: ide-atapi-check-io_buffer_index-in-ide_atapi_cmd_rep.patch +Patch0301: block-backend-Stop-retrying-when-draining.patch +Patch0302: migration-fix-memory-leak-in-qmp_migrate_set_paramet.patch +Patch0303: migration-tls-fix-inverted-semantics-in-multifd_chan.patch +Patch0304: migration-tls-add-error-handling-in-multifd_tls_hand.patch +Patch0305: net-vmxnet3-validate-configuration-values-during-act.patch +Patch0306: block-Add-sanity-check-when-setting-retry-parameters.patch +Patch0307: hw-pci-host-add-pci-intack-write-method.patch +Patch0308: pci-host-add-pcie-msi-read-method.patch +Patch0309: vfio-add-quirk-device-write-method.patch +Patch0310: prep-add-ppc-parity-write-method.patch +Patch0311: nvram-add-nrf51_soc-flash-read-method.patch +Patch0312: spapr_pci-add-spapr-msi-read-method.patch +Patch0313: tz-ppc-add-dummy-read-write-methods.patch +Patch0314: imx7-ccm-add-digprog-mmio-write-method.patch +Patch0315: util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch +Patch0316: arm-cpu-Fixed-function-undefined-error-at-compile-ti.patch +Patch0317: blockjob-Fix-crash-with-IOthread-when-block-commit-a.patch +Patch0318: vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch +Patch0319: vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch +Patch0320: vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch +Patch0321: vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch +Patch0322: vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch +Patch0323: vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch +Patch0324: vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch +Patch0325: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch +Patch0326: hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch +Patch0327: usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch +Patch0328: hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch +Patch0329: x86-Intel-AVX512_BF16-feature-enabling.patch +Patch0330: i386-Add-MSR-feature-bit-for-MDS-NO.patch +Patch0331: i386-Add-macro-for-stibp.patch +Patch0332: i386-Add-new-CPU-model-Cooperlake.patch +Patch0333: target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch +Patch0334: target-i386-Add-missed-security-features-to-Cooperla.patch +Patch0335: target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch +Patch0336: target-i386-Export-TAA_NO-bit-to-guests.patch +Patch0337: target-i386-Introduce-Denverton-CPU-model.patch +Patch0338: target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch +Patch0339: i386-Add-CPUID-bit-for-CLZERO-and-XSAVEERPTR.patch +Patch0340: crypto-add-support-for-nettle-s-native-XTS-impl.patch +Patch0341: crypto-add-support-for-gcrypt-s-native-XTS-impl.patch +Patch0342: tests-benchmark-crypto-with-fixed-data-size-not-time.patch +Patch0343: tests-allow-filtering-crypto-cipher-benchmark-tests.patch +Patch0344: target-i386-handle-filtered_features-in-a-new-functi.patch +Patch0345: target-i386-introduce-generic-feature-dependency-mec.patch +Patch0346: target-i386-expand-feature-words-to-64-bits.patch +Patch0347: target-i386-add-VMX-definitions.patch +Patch0348: vmxcap-correct-the-name-of-the-variables.patch +Patch0349: target-i386-add-VMX-features.patch +Patch0350: target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch +Patch0351: target-i386-add-VMX-features-to-named-CPU-models.patch +Patch0352: target-i386-add-two-missing-VMX-features-for-Skylake.patch +Patch0353: target-i386-disable-VMX-features-if-nested-0.patch +Patch0354: i386-cpu-Don-t-add-unavailable_features-to-env-user_.patch +Patch0355: target-i386-do-not-set-unsupported-VMX-secondary-exe.patch +Patch0356: migration-fix-multifd_send_pages-next-channel.patch +Patch0357: migration-Make-sure-that-we-don-t-call-write-in-case.patch +Patch0358: virtio-don-t-enable-notifications-during-polling.patch +Patch0359: usbredir-Prevent-recursion-in-usbredir_write.patch +Patch0360: xhci-recheck-slot-status.patch +Patch0361: vhost-Add-names-to-section-rounded-warning.patch +Patch0362: vhost-user-Print-unexpected-slave-message-types.patch +Patch0363: contrib-libvhost-user-Protect-slave-fd-with-mutex.patch +Patch0364: libvhost-user-Fix-some-memtable-remap-cases.patch +Patch0365: xics-Don-t-deassert-outputs.patch +Patch0366: i386-Resolve-CPU-models-to-v1-by-default.patch +Patch0367: block-curl-HTTP-header-fields-allow-whitespace-aroun.patch +Patch0368: block-curl-HTTP-header-field-names-are-case-insensit.patch +Patch0369: backup-Improve-error-for-bdrv_getlength-failure.patch +Patch0370: mirror-Make-sure-that-source-and-target-size-match.patch +Patch0371: iotests-143-Create-socket-in-SOCK_DIR.patch +Patch0372: nbd-server-Avoid-long-error-message-assertions-CVE-2.patch +Patch0373: block-Call-attention-to-truncation-of-long-NBD-expor.patch +Patch0374: qemu-img-convert-Don-t-pre-zero-images.patch +Patch0375: qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch +Patch0376: mirror-Wait-only-for-in-flight-operations.patch +Patch0377: virtio-net-delete-also-control-queue-when-TX-RX-dele.patch +Patch0378: target-i386-enable-monitor-and-ucode-revision-with-c.patch +Patch0379: target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch +Patch0380: target-i386-kvm-initialize-feature-MSRs-very-early.patch +Patch0381: target-i386-add-a-ucode-rev-property.patch +Patch0382: migration-use-migration_is_active-to-represent-activ.patch +Patch0383: migration-Rate-limit-inside-host-pages.patch +Patch0384: hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch +Patch0385: qapi-block-core-Introduce-BackupCommon.patch +Patch0386: drive-backup-create-do_backup_common.patch +Patch0387: blockdev-backup-utilize-do_backup_common.patch +Patch0388: qapi-add-BitmapSyncMode-enum.patch +Patch0389: block-backup-Add-mirror-sync-mode-bitmap.patch +Patch0390: block-backup-add-never-policy-to-bitmap-sync-mode.patch +Patch0391: block-backup-loosen-restriction-on-readonly-bitmaps.patch +Patch0392: block-backup-hoist-bitmap-check-into-QMP-interface.patch +Patch0393: block-backup-deal-with-zero-detection.patch +Patch0394: mirror-Fix-bdrv_has_zero_init-use.patch +Patch0395: blockdev-fix-coding-style-issues-in-drive_backup_pre.patch +Patch0396: blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch +Patch0397: blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch +Patch0398: blockdev-honor-bdrv_try_set_aio_context-context-requ.patch +Patch0399: blockdev-Return-bs-to-the-proper-context-on-snapshot.patch +Patch0400: block-Fix-cross-AioContext-blockdev-snapshot.patch +Patch0401: vl-Don-t-mismatch-g_strsplit-g_free.patch +Patch0402: seqlock-fix-seqlock_write_unlock_impl-function.patch +Patch0403: target-i386-kvm-initialize-microcode-revision-from-K.patch +Patch0404: target-i386-check-for-availability-of-MSR_IA32_UCODE.patch +Patch0405: hw-arm-virt-Init-PMU-for-hotplugged-vCPU.patch +Patch0406: Fixed-integer-overflow-in-e1000e.patch +Patch0407: migration-fix-cleanup_bh-leak-on-resume.patch +Patch0408: qmp-fix-leak-on-callbacks-that-return-both-value-and.patch +Patch0409: qga-commands-posix-fix-use-after-free-of-local_err.patch +Patch0410: file-posix-Fix-leaked-fd-in-raw_open_common-error-pa.patch +Patch0411: object-return-self-in-object_ref.patch +Patch0412: lm32-do-not-leak-memory-on-object_new-object_unref.patch +Patch0413: cris-do-not-leak-struct-cris_disasm_data.patch +Patch0414: hppa-fix-leak-from-g_strdup_printf.patch +Patch0415: mcf5208-fix-leak-from-qemu_allocate_irqs.patch +Patch0416: microblaze-fix-leak-of-fdevice-tree-blob.patch +Patch0417: ide-fix-leak-from-qemu_allocate_irqs.patch +Patch0418: make-check-unit-use-after-free-in-test-opts-visitor.patch +Patch0419: xhci-fix-valid.max_access_size-to-access-address-reg.patch +Patch0420: qga-fix-assert-regression-on-guest-shutdown.patch +Patch0421: char-fix-use-after-free-with-dup-chardev-reconnect.patch +Patch0422: migration-Count-new_dirty-instead-of-real_dirty.patch +Patch0423: qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch +Patch0424: chardev-tcp-Fix-error-message-double-free-error.patch +Patch0425: colo-compare-Fix-memory-leak-in-packet_enqueue.patch +Patch0426: hw-block-nvme-fix-pin-based-interrupt-behavior.patch +Patch0427: hw-block-nvme-fix-pci-doorbell-size-calculation.patch +Patch0428: virtio-pci-fix-queue_enable-write.patch +Patch0429: hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch +Patch0430: linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch +Patch0431: migration-rdma-cleanup-rdma-context-before-g_free-to.patch +Patch0432: pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch +Patch0433: block-qcow2-do-free-crypto_opts-in-qcow2_close.patch +Patch0434: qemu-img-free-memory-before-re-assign.patch +Patch0435: block-qcow2-threads-fix-qcow2_decompress.patch +Patch0436: block-Avoid-memleak-on-qcow2-image-info-failure.patch +Patch0437: block-bdrv_set_backing_bs-fix-use-after-free.patch +Patch0438: hmp-vnc-Fix-info-vnc-list-leak.patch +Patch0439: migration-colo-fix-use-after-free-of-local_err.patch +Patch0440: migration-ram-fix-use-after-free-of-local_err.patch +Patch0441: block-mirror-fix-use-after-free-of-local_err.patch +Patch0442: block-fix-bdrv_root_attach_child-forget-to-unref-chi.patch +Patch0443: virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch +Patch0444: virtio-blk-delete-vqs-on-the-error-path-in-realize.patch +Patch0445: fix-vhost_user_blk_watch-crash.patch +Patch0446: vhost-user-blk-delay-vhost_user_blk_disconnect.patch +Patch0447: usbredir-fix-buffer-overflow-on-vmload.patch +Patch0448: display-bochs-display-fix-memory-leak.patch +Patch0449: audio-fix-integer-overflow.patch +Patch0450: migration-multifd-clean-pages-after-filling-packet.patch +Patch0451: migration-multifd-not-use-multifd-during-postcopy.patch +Patch0452: migration-Define-VMSTATE_INSTANCE_ID_ANY.patch +Patch0453: migration-Change-SaveStateEntry.instance_id-into-uin.patch +Patch0454: apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch +Patch0455: virtio-add-ability-to-delete-vq-through-a-pointer.patch +Patch0456: virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch +Patch0457: virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch +Patch0458: vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch +Patch0459: vhost-user-blk-convert-to-new-virtio_delete_queue.patch +Patch0460: block-nbd-extract-the-common-cleanup-code.patch +Patch0461: virtio-gracefully-handle-invalid-region-caches.patch +Patch0462: migration-savevm-release-gslist-after-dump_vmstate_j.patch +Patch0463: virtio-input-fix-memory-leak-on-unrealize.patch +Patch0464: target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch +Patch0465: target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch +Patch0466: target-arm-Update-the-ID-registers-of-Kunpeng-920.patch +Patch0467: hw-net-fix-vmxnet3-live-migration.patch +Patch0468: include-Make-headers-more-self-contained.patch +Patch0469: migration-register_savevm_live-doesn-t-need-dev.patch +Patch0470: vmstate-add-qom-interface-to-get-id.patch +Patch0471: linux-headers-Update-against-Add-migration-support-f.patch +Patch0472: vfio-Add-function-to-unmap-VFIO-region.patch +Patch0473: vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch +Patch0474: vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch +Patch0475: vfio-Add-migration-region-initialization-and-finaliz.patch +Patch0476: vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch +Patch0477: vfio-Add-migration-state-change-notifier.patch +Patch0478: vfio-Register-SaveVMHandlers-for-VFIO-device.patch +Patch0479: vfio-Add-save-state-functions-to-SaveVMHandlers.patch +Patch0480: vfio-Add-load-state-functions-to-SaveVMHandlers.patch +Patch0481: memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch +Patch0482: vfio-Get-migration-capability-flags-for-container.patch +Patch0483: vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch +Patch0484: vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch +Patch0485: vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch +Patch0486: vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch +Patch0487: vfio-Make-vfio-pci-device-migration-capable.patch +Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch +Patch0489: vfio-Move-the-saving-of-the-config-space-to-the-righ.patch +Patch0490: vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch +Patch0491: vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch +Patch0492: kvm-split-too-big-memory-section-on-several-memslots.patch +Patch0493: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +Patch0494: accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch +Patch0495: memory-Skip-dirty-tracking-for-un-migratable-memory-.patch +Patch0496: Fix-use-after-free-in-vfio_migration_probe.patch +Patch0497: vfio-Make-migration-support-experimental.patch +Patch0498: vfio-Change-default-dirty-pages-tracking-behavior-du.patch +Patch0499: vfio-Fix-vfio_listener_log_sync-function-name-typo.patch +Patch0500: vfio-Support-host-translation-granule-size.patch +Patch0501: vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch +Patch0502: vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch +Patch0503: migration-ram-Reduce-unnecessary-rate-limiting.patch +Patch0504: migration-ram-Optimize-ram_save_host_page.patch +Patch0505: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch +Patch0506: linux-headers-update-against-5.10-and-manual-clear-v.patch +Patch0507: vfio-Maintain-DMA-mapping-range-for-the-container.patch +Patch0508: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch +Patch0509: hw-arm-smmuv3-Support-16K-translation-granule.patch +Patch0510: hw-arm-smmuv3-Set-the-restoration-priority-of-the-vS.patch +Patch0511: hw-vfio-common-trace-vfio_connect_container-operatio.patch +Patch0512: update-linux-headers-Import-iommu.h.patch +Patch0513: vfio.h-and-iommu.h-header-update-against-5.10.patch +Patch0514: memory-Add-new-fields-in-IOTLBEntry.patch +Patch0515: hw-arm-smmuv3-Improve-stage1-ASID-invalidation.patch +Patch0516: hw-arm-smmu-common-Allow-domain-invalidation-for-NH_.patch +Patch0517: memory-Add-IOMMU_ATTR_VFIO_NESTED-IOMMU-memory-regio.patch +Patch0518: memory-Add-IOMMU_ATTR_MSI_TRANSLATE-IOMMU-memory-reg.patch +Patch0519: memory-Introduce-IOMMU-Memory-Region-inject_faults-A.patch +Patch0520: iommu-Introduce-generic-header.patch +Patch0521: pci-introduce-PCIPASIDOps-to-PCIDevice.patch +Patch0522: vfio-Force-nested-if-iommu-requires-it.patch +Patch0523: vfio-Introduce-hostwin_from_range-helper.patch +Patch0524: vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch +Patch0525: vfio-Set-up-nested-stage-mappings.patch +Patch0526: vfio-Pass-stage-1-MSI-bindings-to-the-host.patch +Patch0527: vfio-Helper-to-get-IRQ-info-including-capabilities.patch +Patch0528: vfio-pci-Register-handler-for-iommu-fault.patch +Patch0529: vfio-pci-Set-up-the-DMA-FAULT-region.patch +Patch0530: vfio-pci-Implement-the-DMA-fault-handler.patch +Patch0531: hw-arm-smmuv3-Advertise-MSI_TRANSLATE-attribute.patch +Patch0532: hw-arm-smmuv3-Store-the-PASID-table-GPA-in-the-trans.patch +Patch0533: hw-arm-smmuv3-Fill-the-IOTLBEntry-arch_id-on-NH_VA-i.patch +Patch0534: hw-arm-smmuv3-Fill-the-IOTLBEntry-leaf-field-on-NH_V.patch +Patch0535: hw-arm-smmuv3-Pass-stage-1-configurations-to-the-hos.patch +Patch0536: hw-arm-smmuv3-Implement-fault-injection.patch +Patch0537: hw-arm-smmuv3-Allow-MAP-notifiers.patch +Patch0538: pci-Add-return_page_response-pci-ops.patch +Patch0539: vfio-pci-Implement-return_page_response-page-respons.patch +Patch0540: vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch +Patch0541: vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch +Patch0542: vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch +Patch0543: vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch +Patch0544: vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch +Patch0545: hw-arm-smmuv3-Post-load-stage-1-configurations-to-th.patch +Patch0546: usbredir-fix-free-call.patch +Patch0547: vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch +Patch0548: vfio-common-Add-address-alignment-check-in-vfio_list.patch +Patch0549: uas-add-stream-number-sanity-checks.patch +Patch0550: virtio-net-fix-use-after-unmap-free-for-sg.patch +Patch0551: Add-mtod_check.patch +Patch0552: bootp-limit-vendor-specific-area-to-input-packet-mem.patch +Patch0553: bootp-check-bootp_input-buffer-size.patch +Patch0554: upd6-check-udp6_input-buffer-size.patch +Patch0555: tftp-check-tftp_input-buffer-size.patch +Patch0556: tftp-introduce-a-header-structure.patch BuildRequires: flex +BuildRequires: gcc BuildRequires: bison BuildRequires: texinfo BuildRequires: perl-podlators @@ -72,6 +578,7 @@ BuildRequires: gettext BuildRequires: python-sphinx BuildRequires: zlib-devel +BuildRequires: zstd-devel >= 1.4.5 BuildRequires: gtk3-devel BuildRequires: gnutls-devel BuildRequires: numactl-devel @@ -103,17 +610,27 @@ BuildRequires: libudev-devel BuildRequires: pam-devel BuildRequires: perl-Test-Harness BuildRequires: python3-devel +BuildRequires: librbd-devel +BuildRequires: krb5-devel +BuildRequires: libssh-devel +BuildRequires: glib2 +BuildRequires: spice-server-devel >= 0.12.5 +BuildRequires: spice-protocol >= 0.12.3 %ifarch aarch64 BuildRequires: libfdt-devel BuildRequires: virglrenderer-devel %endif +Requires: zstd >= 1.4.5 Requires(post): /usr/bin/getent Requires(post): /usr/sbin/groupadd Requires(post): /usr/sbin/useradd Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units +Requires(postun): qemu-block-iscsi +Requires(postun): qemu-block-curl + %description QEMU is a FAST! processor emulator using dynamic translation to achieve good emulation speed. @@ -150,6 +667,26 @@ Summary: QEMU command line tool for manipulating disk images %description img This package provides a command line tool for manipulating disk images +%package block-rbd +Summary: Qemu-block-rbd +%description block-rbd +This package provides RBD support for Qemu + +%package block-ssh +Summary: Qemu-block-ssh +%description block-ssh +This package provides block-ssh support for Qemu + +%package block-iscsi +Summary: Qemu-block-iscsi +%description block-iscsi +This package provides block-iscsi support for Qemu + +%package block-curl +Summary: Qemu-block-curl +%description block-curl +This package provides block-curl support for Qemu + %ifarch %{ix86} x86_64 %package seabios Summary: QEMU seabios @@ -186,8 +723,6 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --firmwarepath=%{_datadir}/%{name} \ --with-pkgversion=%{name}-%{version}-%{release} \ --python=/usr/bin/python3 \ - --disable-strip \ - --disable-werror \ --disable-slirp \ --enable-gtk \ --enable-docs \ @@ -203,6 +738,10 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --enable-linux-aio \ --enable-cap-ng \ --enable-vhost-user \ + --enable-tpm \ + --enable-modules \ + --enable-libssh \ + --enable-spice \ %ifarch aarch64 --enable-fdt \ --enable-virglrenderer \ @@ -218,7 +757,8 @@ buildldflags="VL_LDFLAGS=-Wl,--build-id" --disable-parallels \ --disable-sheepdog \ --disable-capstone \ - --disable-smartcard + --disable-smartcard \ + --enable-zstd make %{?_smp_mflags} $buildldflags V=1 @@ -247,7 +787,9 @@ install -D -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevdi mkdir -p %{buildroot}%{_localstatedir}/log touch %{buildroot}%{_localstatedir}/log/qga-fsfreeze-hook.log +# For qemu docs package %global qemudocdir %{_docdir}/%{name} +rm -rf %{buildroot}%{qemudocdir}/specs install -D -p -m 0644 -t %{buildroot}%{qemudocdir} Changelog README COPYING COPYING.LIB LICENSE chmod -x %{buildroot}%{_mandir}/man1/* @@ -262,6 +804,9 @@ rm -rf %{buildroot}%{_datadir}/%{name}/multiboot.bin rm -rf %{buildroot}%{_datadir}/%{name}/linuxboot_dma.bin rm -rf %{buildroot}%{_datadir}/%{name}/pvh.bin %endif +%ifarch x86_64 +rm -rf %{buildroot}%{_datadir}/%{name}/vgabios-ati.bin +%endif rm -rf %{buildroot}%{_datadir}/%{name}/openbios-* rm -rf %{buildroot}%{_datadir}/%{name}/slof.bin rm -rf %{buildroot}%{_datadir}/%{name}/QEMU,*.bin @@ -277,6 +822,23 @@ rm -rf %{buildroot}%{_datadir}/%{name}/skiboot.lid rm -rf %{buildroot}%{_datadir}/%{name}/spapr-* rm -rf %{buildroot}%{_datadir}/%{name}/u-boot* rm -rf %{buildroot}%{_bindir}/ivshmem* +rm -f %{buildroot}%{_datadir}/%{name}/edk2* +rm -rf %{buildroot}%{_datadir}/%{name}/firmware +rm -rf %{buildroot}%{_datadir}/%{name}/opensbi* +rm -rf %{buildroot}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf %{buildroot}%{_libdir}/%{name}/audio-oss.so +rm -rf %{buildroot}%{_libdir}/%{name}/audio-pa.so +rm -rf %{buildroot}%{_libdir}/%{name}/block-gluster.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-curses.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-gtk.so +rm -rf %{buildroot}%{_libdir}/%{name}/ui-sdl.so +rm -rf %{buildroot}%{_libexecdir}/vhost-user-gpu +rm -rf %{buildroot}%{_datadir}/%{name}/vhost-user/50-qemu-gpu.json + +strip %{buildroot}%{_libdir}/%{name}/block-rbd.so +strip %{buildroot}%{_libdir}/%{name}/block-iscsi.so +strip %{buildroot}%{_libdir}/%{name}/block-curl.so +strip %{buildroot}%{_libdir}/%{name}/block-ssh.so for f in %{buildroot}%{_bindir}/* %{buildroot}%{_libdir}/* \ %{buildroot}%{_libexecdir}/*; do @@ -389,6 +951,18 @@ getent passwd qemu >/dev/null || \ %{_bindir}/qemu-io %{_bindir}/qemu-nbd +%files block-rbd +%{_libdir}/%{name}/block-rbd.so + +%files block-ssh +%{_libdir}/%{name}/block-ssh.so + +%files block-iscsi +%{_libdir}/%{name}/block-iscsi.so + +%files block-curl +%{_libdir}/%{name}/block-curl.so + %ifarch %{ix86} x86_64 %files seabios %{_datadir}/%{name}/bios-256k.bin @@ -396,89 +970,681 @@ getent passwd qemu >/dev/null || \ %endif %changelog -* Wed Mar 18 2020 Huawei Technologies Co., Ltd. +* Tue Oct 26 2021 imxcc +- fix cve-2021-3592 cve-2021-3593 cve-2021-3595 + +* Sun Sep 26 2021 Chen Qun +- virtio-net: fix use after unmap/free for sg + +* Thu Sep 16 2021 Chen Qun +- uas: add stream number sanity checks. + +* Tue Sep 14 2021 Chen Qun +- vfio/common: Add address alignment check in vfio_listener_region_del + +* Wed Sep 08 2021 Chen Qun +- vfio/common: Fix incorrect address alignment in vfio_dma_map_ram_section + +* Mon Aug 30 2021 Zhongrui Tang +- qemu.spec: Specify the package version of zstd and zstd-devel that qemu depends on. + +* Thu Aug 19 2021 Jiajie Li +- add qemu-block-curl package +- add qemu-block-curl requirement for qemu. + +* Mon Aug 16 2021 Chen Qun +- usbredir: fix free call + +* Wed Aug 04 2021 Chen Qun +- vfio: Support host translation granule size +- vfio/migrate: Move switch of dirty tracking into vfio_memory_listener +- vfio: Fix unregister SaveVMHandler in vfio_migration_finalize +- migration/ram: Reduce unnecessary rate limiting +- migration/ram: Optimize ram_save_host_page() +- qdev/monitors: Fix reundant error_setg of qdev_add_device +- linux-headers: update against 5.10 and manual clear vfio dirty log series +- vfio: Maintain DMA mapping range for the container +- vfio/migration: Add support for manual clear vfio dirty log +- hw/arm/smmuv3: Support 16K translation granule +- hw/arm/smmuv3: Set the restoration priority of the vSMMUv3 explicitly +- hw/vfio/common: trace vfio_connect_container operations +- update-linux-headers: Import iommu.h +- vfio.h and iommu.h header update against 5.10 +- memory: Add new fields in IOTLBEntry +- hw/arm/smmuv3: Improve stage1 ASID invalidation +- hw/arm/smmu-common: Allow domain invalidation for NH_ALL/NSNH_ALL +- memory: Add IOMMU_ATTR_VFIO_NESTED IOMMU memory region attribute +- memory: Add IOMMU_ATTR_MSI_TRANSLATE IOMMU memory region attribute +- memory: Introduce IOMMU Memory Region inject_faults API +- iommu: Introduce generic header +- pci: introduce PCIPASIDOps to PCIDevice +- vfio: Force nested if iommu requires it +- vfio: Introduce hostwin_from_range helper +- vfio: Introduce helpers to DMA map/unmap a RAM section +- vfio: Set up nested stage mappings +- vfio: Pass stage 1 MSI bindings to the host +- vfio: Helper to get IRQ info including capabilities +- vfio/pci: Register handler for iommu fault +- vfio/pci: Set up the DMA FAULT region +- vfio/pci: Implement the DMA fault handler +- hw/arm/smmuv3: Advertise MSI_TRANSLATE attribute +- hw/arm/smmuv3: Store the PASID table GPA in the translation config +- hw/arm/smmuv3: Fill the IOTLBEntry arch_id on NH_VA invalidation +- hw/arm/smmuv3: Fill the IOTLBEntry leaf field on NH_VA invalidation +- hw/arm/smmuv3: Pass stage 1 configurations to the host +- hw/arm/smmuv3: Implement fault injection +- hw/arm/smmuv3: Allow MAP notifiers +- pci: Add return_page_response pci ops +- vfio/pci: Implement return_page_response page response callback +- vfio/common: Avoid unmap ram section at vfio_listener_region_del() in nested mode +- vfio: Introduce helpers to mark dirty pages of a RAM section +- vfio: Add vfio_prereg_listener_log_sync in nested stage +- vfio: Add vfio_prereg_listener_log_clear to re-enable mark dirty pages +- vfio: Add vfio_prereg_listener_global_log_start/stop in nested stage +- hw/arm/smmuv3: Post-load stage 1 configurations to the host + +* Tue Aug 03 2021 Chen Qun +- kvm: split too big memory section on several memslots +- kvm: Reallocate dirty_bmap when we change a slot +- accel: kvm: Fix memory waste under mismatch page size +- memory: Skip dirty tracking for un-migratable memory regions +- Fix use after free in vfio_migration_probe +- vfio: Make migration support experimental +- vfio: Change default dirty pages tracking behavior during migration +- vfio: Fix vfio_listener_log_sync function name typo + +* Thu Jul 29 2021 Chen Qun +- vfio: Move the saving of the config space to the right place in VFIO migration +- vfio: Set the priority of the VFIO VM state change handler explicitly +- vfio: Avoid disabling and enabling vectors repeatedly in VFIO migration + +* Thu Jul 29 2021 imxcc +- hw/net: fix vmxnet3 live migration +- include: Make headers more self-contained +- migration: register_savevm_live doesn't need dev +- vmstate: add qom interface to get id +- linux headers: Update against "Add migration support for VFIO devices" +- vfio: Add function to unmap VFIO region +- vfio: Add vfio_get_object callback to VFIODeviceOps +- vfio: Add save and load functions for VFIO PCI devices +- vfio: Add migration region initialization and finalize function +- vfio: Add VM state change handler to know state of VM +- vfio: Add migration state change notifier +- vfio: Register SaveVMHandlers for VFIO device +- vfio: Add save state functions to SaveVMHandlers +- vfio: Add load state functions to SaveVMHandlers +- memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled +- vfio: Get migration capability flags for container +- vfio: Add function to start and stop dirty pages tracking +- vfio: Add vfio_listener_log_sync to mark dirty pages +- vfio: Dirty page tracking when vIOMMU is enabled +- vfio: Add ioctl to get dirty pages bitmap during dma unmap +- vfio: Make vfio-pci device migration capable +- qapi: Add VFIO devices migration stats in Migration stats + +* Wed Jul 28 2021 imxcc +- object: return self in object_ref() +- file-posix: Fix leaked fd in raw_open_common() error path +- qga/commands-posix: fix use after free of local_err +- qmp: fix leak on callbacks that return both value and error +- migration: fix cleanup_bh leak on resume +- Fixed integer overflow in e1000e +- lm32-do-not-leak-memory-on-object_new-object_unref.patch +- cris-do-not-leak-struct-cris_disasm_data.patch +- hppa-fix-leak-from-g_strdup_printf.patch +- mcf5208-fix-leak-from-qemu_allocate_irqs.patch +- microblaze-fix-leak-of-fdevice-tree-blob.patch +- ide-fix-leak-from-qemu_allocate_irqs.patch +- make-check-unit-use-after-free-in-test-opts-visitor.patch +- virtio-pci: fix queue_enable write +- hw/block/nvme: fix pci doorbell size calculation +- hw/block/nvme: fix pin-based interrupt behavior +- colo-compare: Fix memory leak in packet_enqueue() +- chardev/tcp: Fix error message double free error +- qga: Plug unlikely memory leak in guest-set-memory-blocks +- migration: Count new_dirty instead of real_dirty +- char: fix use-after-free with dup chardev & reconnect +- qga: fix assert regression on guest-shutdown +- xhci: fix valid.max_access_size to access address registers +- block/qcow2: do free crypto_opts in qcow2_close() +- qemu-img: free memory before re-assign +- block/qcow2-threads: fix qcow2_decompress +- block: Avoid memleak on qcow2 image info failure +- block: bdrv_set_backing_bs: fix use-after-free +- hmp/vnc: Fix info vnc list leak +- migration/colo: fix use after free of local_err +- migration/ram: fix use after free of local_err +- block/mirror: fix use after free of local_err +- block: fix bdrv_root_attach_child forget to unref child_bs +- virtio-serial-bus: Plug memory leak on realize() error paths +- virtio-blk: delete vqs on the error path in realize() +- fix vhost_user_blk_watch crash +- vhost-user-blk: delay vhost_user_blk_disconnect +- hw-pci-pci_bridge-Correct-pci_bridge_io-memory-regio.patch +- linux-user-mmap.c-fix-integer-underflow-in-target_mr.patch +- migration-rdma-cleanup-rdma-context-before-g_free-to.patch +- pc-bios-s390-ccw-net-fix-a-possible-memory-leak-in-g.patch +- usbredir-fix-buffer-overflow-on-vmload.patch +- apic: Use 32bit APIC ID for migration instance-ID +- audio: fix integer overflow +- display/bochs-display: fix memory leak +- migration: Change SaveStateEntry.instance_id into uint32_t +- migration: Define VMSTATE_INSTANCE_ID_ANY +- migration/multifd: clean pages after filling packet +- migration/multifd: not use multifd during postcopy +- virtio: add ability to delete vq through a pointer +- virtio-pmem: do delete rq_vq in virtio_pmem_unrealize +- virtio-crypto: do delete ctrl_vq in virtio_crypto_device_unrealize +- vhost-user-blk: delete virtioqueues in unrealize to fix memleaks +- vhost-user-blk: convert to new virtio_delete_queue +- block/nbd: extract the common cleanup code +- virtio: gracefully handle invalid region caches +- migration/savevm: release gslist after dump_vmstate_json +- virtio-input: fix memory leak on unrealize +- target/arm: only set ID_PFR1_EL1.GIC for AArch32 guest +- target/arm: clear EL2 and EL3 only when kvm is not enabled +- target/arm: Update the ID registers of Kunpeng-920 + +* Fri Jul 23 2021 imxcc +- hw/arm/virt: Init PMU for hotplugged vCPU + +* Fri Jul 23 2021 Chen Qun +- vl: Don't mismatch g_strsplit()/g_free() +- seqlock: fix seqlock_write_unlock_impl function +- target/i386: kvm: initialize microcode revision from KVM +- target/i386: check for availability of MSR_IA32_UCODE_REV as an emulated MSR + +* Thu Jul 22 2021 Chen Qun +- qapi/block-core: Introduce BackupCommon +- drive-backup: create do_backup_common +- blockdev-backup: utilize do_backup_common +- qapi: add BitmapSyncMode enum +- block/backup: Add mirror sync mode 'bitmap' +- block/backup: add 'never' policy to bitmap sync mode +- block/backup: loosen restriction on readonly bitmaps +- block/backup: hoist bitmap check into QMP interface +- block/backup: deal with zero detection +- mirror: Fix bdrv_has_zero_init() use +- blockdev: fix coding style issues in drive_backup_prepare +- blockdev: unify qmp_drive_backup and drive-backup transaction paths +- blockdev: unify qmp_blockdev_backup and blockdev-backup transaction paths +- blockdev: honor bdrv_try_set_aio_context() context requirements +- blockdev: Return bs to the proper context on snapshot abort +- block: Fix cross-AioContext blockdev-snapshot + +* Thu Jul 22 2021 Chen Qun +- hw/pci/pcie: Move hot plug capability check to pre_plug callback + +* Thu Jul 22 2021 Chen Qun +- migration: use migration_is_active to represent active state +- migration: Rate limit inside host pages + +* Thu Jul 22 2021 Chen Qun +- virtio-net: delete also control queue when TX/RX deleted +- target/i386: enable monitor and ucode revision with -cpu max +- target/i386: set the CPUID level to 0x14 on old machine-type +- target/i386: kvm: initialize feature MSRs very early +- target/i386: add a ucode-rev property + +* Thu Jul 22 2021 Chen Qun +- qcow2: Fix qcow2_alloc_cluster_abort() for external data file +- mirror: Wait only for in-flight operations + +* Wed Jul 21 2021 Chen Qun +- block/curl: HTTP header fields allow whitespace around values +- block/curl: HTTP header field names are case insensitive +- backup: Improve error for bdrv_getlength() failure +- mirror: Make sure that source and target size match +- iotests/143: Create socket in $SOCK_DIR +- nbd/server: Avoid long error message assertions CVE-2020-10761 +- block: Call attention to truncation of long NBD exports +- qemu-img convert: Don't pre-zero images + +* Wed Jul 21 2021 Chen Qun +- virtio: don't enable notifications during polling +- usbredir: Prevent recursion in usbredir_write +- xhci: recheck slot status +- vhost: Add names to section rounded warning +- vhost-user: Print unexpected slave message types +- contrib/libvhost-user: Protect slave fd with mutex +- libvhost-user: Fix some memtable remap cases +- xics: Don't deassert outputs +- i386: Resolve CPU models to v1 by default + +* Wed Jul 21 2021 imxcc +- target/i386: handle filtered_features in a new function mark_unavailable_features +- target/i386: introduce generic feature dependency mechanism +- target/i386: expand feature words to 64 bits +- target/i386: add VMX definitions +- vmxcap: correct the name of the variables +- target/i386: add VMX features +- target/i386: work around KVM_GET_MSRS bug for secondary execution controls +- target/i386: add VMX features to named CPU models +- target/i386: add two missing VMX features for Skylake and CascadeLake Server +- target/i386: disable VMX features if nested=0 +- i386/cpu: Don't add unavailable_features to env->user_features +- target/i386: do not set unsupported VMX secondary execution controls +- migration: fix multifd_send_pages() next channel +- migration: Make sure that we don't call write() in case of error + +* Tue Jul 20 2021 Chen Qun +- crypto: add support for nettle's native XTS impl +- crypto: add support for gcrypt's native XTS impl +- tests: benchmark crypto with fixed data size, not time period +- tests: allow filtering crypto cipher benchmark tests + +* Tue Jul 20 2021 Chen Qun +- target/i386: Introduce Denverton CPU model +- target/i386: Add Snowridge-v2 (no MPX) CPU model +- i386: Add CPUID bit for CLZERO and XSAVEERPTR + +* Mon Jul 19 2021 Chen Qun +- x86: Intel AVX512_BF16 feature enabling +- i386: Add MSR feature bit for MDS-NO +- i386: Add macro for stibp +- i386: Add new CPU model Cooperlake +- target/i386: Add new bit definitions of MSR_IA32_ARCH_CAPABILITIES +- target/i386: Add missed security features to Cooperlake CPU model +- target/i386: add PSCHANGE_NO bit for the ARCH_CAPABILITIES MSR +- target/i386: Export TAA_NO bit to guests + +* Mon Jul 19 2021 Chen Qun +- hw/net/rocker_of_dpa: fix double free bug of rocker device + +* Mon Jun 28 2021 imxcc +- spec: add gcc buildrequire + +* Mon Jun 21 2021 Chen Qun +- ide: ahci: add check to avoid null dereference (CVE-2019-12067) +- hw/intc/arm_gic: Fix interrupt ID in GICD_SGIR register +- usb: limit combined packets to 1 MiB (CVE-2021-3527) + +* Tue Jun 15 2021 Chen Qun +- vhost-user-gpu: fix resource leak in 'vg_resource_create_2d' (CVE-2021-3544) +- vhost-user-gpu: fix memory leak in vg_resource_attach_backing (CVE-2021-3544) +- vhost-user-gpu: fix memory leak while calling 'vg_resource_unref' (CVE-2021-3544) +- vhost-user-gpu: fix memory leak in 'virgl_cmd_resource_unref' (CVE-2021-3544) +- vhost-user-gpu: fix memory leak in 'virgl_resource_attach_backing' (CVE-2021-3544) +- vhost-user-gpu: fix memory disclosure in virgl_cmd_get_capset_info (CVE-2021-3545) +- vhost-user-gpu: fix OOB write in 'virgl_cmd_get_capset' (CVE-2021-3546) + +* Fri May 28 2021 Chen Qun +- blockjob: Fix crash with IOthread when block commit after snapshot + +* Thu May 20 2021 zhouli57 +- arm/cpu: Fixed function undefined error at compile time under arm + +* Wed May 19 2021 Ming Yang +- add strip for block-iscsi.so, block-rbd.so and block-ssh.so. + +* Wed May 19 2021 zhouli57 +- util/cacheinfo: fix crash when compiling with uClibc + +* Fri Mar 26 2021 Chen Qun +- hw/pci-host: add pci-intack write method +- pci-host: add pcie-msi read method +- vfio: add quirk device write method +- prep: add ppc-parity write method +- nvram: add nrf51_soc flash read method +- spapr_pci: add spapr msi read method +- tz-ppc: add dummy read/write methods +- imx7-ccm: add digprog mmio write method + +* Thu Mar 18 2021 Chen Qun +- block: Add sanity check when setting retry parameters + +* Wed Mar 17 2021 Huawei Technologies Co., Ltd +- qemu.spec: enable strip for qemu-block-rbd.so and qemu-block-ssh.so + +* Fri Mar 12 2021 Chen Qun +- net: vmxnet3: validate configuration values during activate (CVE-2021-20203) + +* Fri Mar 12 2021 Chen Qun +- migration: fix memory leak in qmp_migrate_set_parameters +- migration/tls: fix inverted semantics in multifd_channel_connect +- migration/tls: add error handling in multifd_tls_handshake_thread + +* Thu Mar 11 2021 Huawei Technologies Co., Ltd +- qemu.spec: add iscsi rpm package requirement + +* Wed Mar 10 2021 Huawei Technologies Co., Ltd +- qemu.spec: make iscsi rpm package + +* Tue Mar 02 2021 Huawei Technologies Co., Ltd +- qemu.spec: Add --enable-zstd compile parameter + +* Fri Feb 26 2021 Huawei Technologies Co., Ltd +- block-backend: Stop retrying when draining + +* Fri Feb 26 2021 Huawei Technologies Co., Ltd +- ide:atapi: check io_buffer_index in ide_atapi_cmd_reply_end + +* Fri Feb 19 2021 Huawei Technologies Co., Ltd +- ati: use vga_read_byte in ati_cursor_define +- sd: sdhci: assert data_count is within fifo_buffer +- msix: add valid.accepts methods to check address + +* Thu Feb 04 2021 Huawei Technologies Co., Ltd +- migration: Add multi-thread compress method +- migration: Refactoring multi-thread compress migration +- migration: Add multi-thread compress ops +- migration: Add zstd support in multi-thread compression +- migration: Add compress_level sanity check +- doc: Update multi-thread compression doc +- configure: Enable test and libs for zstd + +* Sat Jan 30 2021 Huawei Technologies Co., Ltd +- scsi-bus: Refactor the code that retries requests +- scsi-disk: Add support for retry on errors +- qapi/block-core: Add retry option for error action +- block-backend: Introduce retry timer +- block-backend: Add device specific retry callback +- block-backend: Enable retry action on errors +- block-backend: Add timeout support for retry +- block: Add error retry param setting +- virtio-blk: Refactor the code that processes queued requests +- virtio-blk: On restart, process queued requests in the proper context +- virtio_blk: Add support for retry on errors + +* Mon Jan 18 2021 Huawei Technologies Co., Ltd +- feature: enable spice protocol + +* Mon Jan 18 2021 Huawei Technologies Co., Ltd +- reorder changelog in desceding order + +* Fri Jan 15 2021 Huawei Technologies Co., Ltd +- memory: clamp cached translation in case it points to an MMIO region + +* Fri Dec 25 2020 Huawei Technologies Co., Ltd +- add qemu-block-rbd package +- add qemu-block-ssh package + +* Fri Dec 11 2020 Huawei Technologies Co., Ltd +- hostmem: Fix up free host_nodes list right after visited + +* Fri Dec 11 2020 Huawei Technologies Co., Ltd +- slirp: check pkt_len before reading protocol header for fixing CVE-2020-29129 and CVE-2020-29130 + +* Wed Dec 9 2020 Huawei Technologies Co., Ltd +- target/arm: Fix write redundant values to kvm + +* Wed Dec 2 2020 Huawei Technologies Co., Ltd +- migration/tls: save hostname into MigrationState +- migration/tls: extract migration_tls_client_create for common-use +- migration/tls: add tls_hostname into MultiFDSendParams +- migration/tls: extract cleanup function for common-use +- migration/tls: add support for multifd tls-handshake +- migration/tls: add trace points for multifd-tls +- qemu-file: Don't do IO after shutdown +- multifd: Make sure that we don't do any IO after an error +- migration: Don't send data if we have stopped +- migration: Create migration_is_running() +- migration: fix COLO broken caused by a previous commit +- migration/multifd: fix hangup with TLS-Multifd due to blocking handshake +- multifd/tls: fix memoryleak of the QIOChannelSocket object when cancelling migration + +* Wed Nov 18 2020 Huawei Technologies Co., Ltd +- ati: check x y display parameter values + +* Fri Nov 13 2020 Huawei Technologies Co., Ltd +- json: Fix a memleak in parse_pair() + +* Wed Nov 11 2020 Huawei Technologies Co., Ltd +- hw: usb: hcd-ohci: check for processed TD before retire +- hw: ehci: check return value of 'usb_packet_map' +- hw: usb: hcd-ohci: check len and frame_number variables +- hw/net/e1000e: advance desc_offset in case of null descriptor + +* Fri Oct 30 2020 Huawei Technologies Co., Ltd +- migration/dirtyrate: setup up query-dirtyrate framwork +- migration/dirtyrate: add DirtyRateStatus to denote calculation status +- migration/dirtyrate: Add RamblockDirtyInfo to store sampled page info +- migration/dirtyrate: Add dirtyrate statistics series functions +- migration/dirtyrate: move RAMBLOCK_FOREACH_MIGRATABLE into ram.h +- migration/dirtyrate: Record hash results for each sampled page +- migration/dirtyrate: Compare page hash results for recorded sampled page +- migration/dirtyrate: skip sampling ramblock with size below MIN_RAMBLOCK_SIZE +- migration/dirtyrate: Implement set_sample_page_period() and is_sample_period_valid() +- migration/dirtyrate: Implement calculate_dirtyrate() function +- migration/dirtyrate: Implement qmp_cal_dirty_rate()/qmp_get_dirty_rate() function +- migration/dirtyrate: Add trace_calls to make it easier to debug +- migration/dirtyrate: record start_time and calc_time while at the measuring state +- migration/dirtyrate: present dirty rate only when querying the rate has completed +- migration/dirtyrate: simplify includes in dirtyrate.c + +* Fri Oct 30 2020 Huawei Technologies Co., Ltd +- elf2dmp: Fix memory leak on main() error paths +- io: Don't use '#' flag of printf format +- hw/display/omap_lcdc: Fix potential NULL pointer dereference +- hw/display/exynos4210_fimd: Fix potential NULL pointer dereference +- block/vvfat: Fix bad printf format specifiers +- block: Remove unused include +- ssi: Fix bad printf format specifiers +- net/l2tpv3: Remove redundant check in net_init_l2tpv3() + +* Thu Oct 29 2020 Huawei Technologies Co., Ltd +- Bugfix: hw/acpi: Use max_cpus instead of cpus when build PPTT table + +* Wed Oct 21 2020 Huawei Technologies Co., Ltd +- net: remove an assert call in eth_get_gso_type + +* Wed Oct 14 2020 Prasad J Pandit +- pci: check bus pointer before dereference +- hw/ide: check null block before _cancel_dma_sync + +* Mon Sep 28 2020 Huawei Technologies Co., Ltd +- sm501: Replace hand written implementation with pixman where possible +- sm501: Clean up local variables in sm501_2d_operation +- sm501: Use BIT(x) macro to shorten constant +- sm501: Shorten long variable names in sm501_2d_operation +- sm501: Convert printf + abort to qemu_log_mask +- hw/net/net_tx_pkt: fix assertion failure in net_tx_pkt_add_raw_fragment +- hw/net/xgmac: Fix buffer overflow in xgmac_enet_send() + +* Fri Sep 18 2020 Huawei Technologies Co., Ltd +- hw-sd-sdhci-Fix-DMA-Transfer-Block-Size-field.patch +- hw-xhci-check-return-value-of-usb_packet_map.patch + +* Fri Sep 11 2020 Huawei Technologies Co., Ltd +- slirp/src/ip6_input.c: fix out-of-bounds read information vulnerability + +* Tue Sep 08 2020 Huawei Technologies Co., Ltd +- target/arm: ignore evtstrm and cpuid CPU features + +* Fri Aug 21 2020 Huawei Technologies Co., Ltd +- hw/usb/core.c: fix buffer overflow in do_token_setup function + +* Wed Aug 19 2020 Huawei Technologies Co., Ltd +- target-arm-convert-isar-regs-to-array.patch +- target-arm-parse-cpu-feature-related-options.patch +- target-arm-register-CPU-features-for-property.patch +- target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch +- target-arm-introduce-CPU-feature-dependency-mechanis.patch +- target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch +- target-arm-Add-CPU-features-to-query-cpu-model-expan.patch +- target-arm-Update-ID-fields.patch +- target-arm-Add-more-CPU-features.patch + +* Wed Aug 19 2020 Huawei Technologies Co., Ltd +- target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch +- target-arm-Add-ID_AA64MMFR2_EL1.patch +- target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch +- target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch +- target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch +- target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch +- target-arm-Stop-assuming-DBGDIDR-always-exists.patch +- target-arm-Move-DBGDIDR-into-ARMISARegisters.patch +- target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch +- target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch +- target-arm-Read-debug-related-ID-registers-from-KVM.patch +- target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch +- target-arm-monitor-query-cpu-model-expansion-crashed.patch + +* Tue Aug 18 2020 Huawei Technologies Co., Ltd +- hw/acpi/aml-build.c: build smt processor structure to support smt topology + +* Thu Aug 13 2020 Huawei Technologies Co., Ltd +-target/arm: Aarch64 support vtpm + +* Wed Aug 12 2020 Huawei Technologies Co., Ltd +- backport upstream patch to support SHPCHotplug in arm + +* Thu Aug 6 2020 Huawei Technologies Co., Ltd +- es1370: check total frame count against current frame +- exec: set map length to zero when returning NULL +- ati-vga: check mm_index before recursive call (CVE-2020-13800) +- megasas: use unsigned type for reply_queue_head and check index +- megasas: avoid NULL pointer dereference +- megasas: use unsigned type for positive numeric fields +- hw/scsi/megasas: Fix possible out-of-bounds array access in tracepoints + +* Thu Aug 6 2020 Huawei Technologies Co., Ltd +- tests: Disalbe filemonitor testcase + +* Sat Jun 20 2020 Huawei Technologies Co., Ltd +- target/arm: Fix PAuth sbox functions +- fix two patches' format which can cause git am failed + +* Fri May 29 2020 Huawei Technologies Co., Ltd +- target/arm: Add the kvm_adjvtime vcpu property for Cortex-A72 + +* Wed May 27 2020 Huawei Technologies Co., Ltd. +- Revert: "vtimer: compat cross version migration from v4.0.1" +- ARM64: record vtimer tick when cpu is stopped +- hw/arm/virt: add missing compat for kvm-no-adjvtime +- migration: Compat virtual timer adjust for v4.0.1 and v4.1.0 +- vtimer: Drop vtimer virtual timer adjust + +* Fri May 22 2020 Huawei Technologies Co., Ltd. +- ip_reass: Fix use after free +- bt: use size_t type for length parameters instead of int +- log: Add some logs on VM runtime path + +* Fri May 15 2020 Huawei Technologies Co., Ltd. +- ide: Fix incorrect handling of some PRDTs in ide_dma_cb() +- ati-vga: Fix checks in ati_2d_blt() to avoid crash +- slirp: tftp: restrict relative path access + +* Tue May 12 2020 Huawei Technologies Co., Ltd. +- arm/virt: Support CPU cold plug + +* Sat May 9 2020 Huawei Technologies Co., Ltd. +- migration/ram: do error_free after migrate_set_error to avoid memleaks. +- migration/ram: fix memleaks in multifd_new_send_channel_async. +- migration/rdma: fix a memleak on error path in rdma_start_incoming_migration. + +* Fri May 8 2020 Huawei Technologies Co., Ltd. +- vtimer: compat cross version migration from v4.0.1 + +* Fri Apr 24 2020 Huawei Technologies Co., Ltd. +- migration: backport migration patches from upstream + +* Fri Apr 24 2020 Huawei Technologies Co., Ltd. +- arm/virt: Add CPU hotplug support + +* Wed Apr 22 2020 Huawei Technologies Co., Ltd. +- backport patch to enable arm/virt memory hotplug + +* Wed Apr 22 2020 Huawei Technologies Co., Ltd. +- backport patch to enable target/arm/kvm Adjust virtual time + +* Fri Apr 17 2020 Huawei Technologies Co., Ltd. +- backport patch bundles from qemu stable v4.1.1 + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- aio-wait: delegate polling of main AioContext if BQL not held +- async: use explicit memory barriers + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. - pcie: Add pcie-root-port fast plug/unplug feature -- pcie: Compat with devices which do not support Link Width +- pcie: Compat with devices which do not support Link Width, such as ioh3420 -* Tue Mar 17 2020 Huawei Technologies Co., Ltd. -- Put linuxboot_dma.bin and pvh.bin in x86 package +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- qcow2-bitmap: Fix uint64_t left-shift overflow -* Mon Mar 16 2020 backport some bug fix patches from upstream -- Patch from number 0040 to 0049 are picked from stable-4.1.1 +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- COLO-compare: Fix incorrect `if` logic -* Mon Mar 16 2020 Huawei Technologies Co., Ltd. -- moniter: fix memleak in monitor_fdset_dup_fd_find_remove +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- block/backup: fix max_transfer handling for copy_range +- block/backup: fix backup_cow_with_offload for last cluster +- qcow2: Limit total allocation range to INT_MAX +- mirror: Do not dereference invalid pointers + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- file-posix: Handle undetectable alignment + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- vhost: Fix memory region section comparison +- memory: Provide an equality function for MemoryRegionSections +- memory: Align MemoryRegionSections fields + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. - block/iscsi: use MIN() between mx_sb_len and sb_len_wr +- moniter: fix memleak in monitor_fdset_dup_fd_find_remove -* Wed Mar 11 2020 backport from qemu upstream -- tcp_emu: Fix oob access -- slirp: use correct size while emulating IRC commands -- slirp: use correct size while emulating commands +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. - tcp_emu: fix unsafe snprintf() usages - -* Mon Mar 9 2020 backport from qemu upstream +- util: add slirp_fmt() helpers +- slirp: use correct size while emulating commands +- slirp: use correct size while emulating IRC commands +- tcp_emu: Fix oob access - iscsi: Cap block count from GET LBA STATUS (CVE-2020-1711) -* Thu Feb 6 2020 Huawei Technologies Co., Ltd. -- spec: remove fno-inline option for configure - -* Thu Jan 16 2020 Huawei Technologies Co., Ltd. +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- 9pfs: local: Fix possible memory leak in local_link() +- scsi-disk: define props in scsi_block_disk to avoid memleaks +- arm/translate-a64: fix uninitialized variable warning +- block: fix memleaks in bdrv_refresh_filename +- vnc: fix memory leak when vnc disconnect - block: fix memleaks in bdrv_refresh_filename -* Mon Jan 13 2020 Huawei Technologies Co., Ltd. -- 9pfs: Fix a possible memory leak in local_link -- scsi-disk: disk define props in scsi_block to avoid memleaks -- arm/translate-a64: fix uninitialized variable warning -- nbd: fix uninitialized variable warning -- xhci: Fix memory leak in xhci_kick_epctx when poweroff +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- linux headers: update against "KVM/ARM: Fix >256 vcpus" +- intc/arm_gic: Support IRQ injection for more than 256 vcpus +- ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for smp_cpus > + +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- vnc: fix memory leak when vnc disconnect -* Mon Jan 6 2020 backport from qemu upstream -- linux headers: update against "KVM/ARM: Fix >256 vcp -- intc/arm_gic: Support IRQ injection for more than 256 vpus -- ARM: KVM: Check KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 for smp_cpus > 256 +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- pcie: disable the PCI_EXP_LINKSTA_DLLA cap for pcie-root-port by default -* Thu Dec 12 2019 backport from qemu upstream v4.0.1 release -- tpm: Exit in reset when backend indicates failure -- tpm_emulator: Translate TPM error codes to strings +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- cpu: add Kunpeng-920 cpu support +- cpu: parse +/- feature to avoid failure +- cpu: add Cortex-A72 processor kvm target support -* Thu Oct 17 2019 backport from qemu upstream -- vnc-fix-memory-leak-when-vnc-disconnect.patch +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- vhost-user-scsi: prevent using uninitialized vqs -* Mon Sep 9 2019 backport from qemu upstream -- ehci-fix-queue-dev-null-ptr-dereference.patch -- memory-unref-the-memory-region-in-simplify-flatview.patch -- util-async-hold-AioContext-ref-to-prevent-use-after-.patch -- vhost-user-scsi-prevent-using-uninitialized-vqs.patch +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- util/async: hold AioContext ref to prevent use-after-free -* Fri Aug 30 2019 Huawei Technologies Co., Ltd. +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. - xhci: Fix memory leak in xhci_address_slot - xhci: Fix memory leak in xhci_kick_epctx +- ehci: fix queue->dev null ptr dereference -* Wed Aug 7 2019 Huawei Technologies Co., Ltd. +* Thu Apr 16 2020 Huawei Technologies Co., Ltd. +- tests/bios-tables-test: disable this testcase - hw/arm/virt: Introduce cpu topology support - hw/arm64: add vcpu cache info support -* Tue Aug 6 2019 Huawei Technologies Co., Ltd. -- Update release version to 4.0.0-2 - -* Mon Aug 5 2019 Huawei Technologies Co., Ltd. -- enable make check +* Wed Apr 15 2020 Huawei Technologies Co., Ltd. - smbios: Add missing member of type 4 for smbios 3.0 -* Mon Aug 5 2019 fix CVE-2019-13164 -- qemu-bridge-helper: restrict interface name to IFNAMSIZ -- qemu-bridge-helper: move repeating code in parse_acl_file +* Wed Apr 15 2020 Huawei Technologies Co., Ltd. +- bios-tables-test: prepare to change ARM virt ACPI DSDT +- arm64: Add the cpufreq device to show cpufreq info to guest -* Tue Jul 30 2019 Huawei Technologies Co., Ltd. - qcow2: fix memory leak in qcow2_read_extensions -- hw/arm: expose host CPU frequency info to guest -* Fri Jul 26 2019 Huawei Technologies Co., Ltd. -- vhost: cancel migration when vhost-user restarted +* Wed Apr 15 2020 Huawei Technologies Co., Ltd. +- pl011: reset read FIFIO when UARTTIMSC=0 & UARTICR=0xff - pl031: support rtc-timer property for pl031 -- pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff -- ARM64: record vtimer tick when cpu is stopped +- vhost: cancel migration when vhost-user restarted -* Tue Jul 23 2019 openEuler Buildteam - version-release +* Mon Apr 13 2020 openEuler Buildteam - version-release - Package init - diff --git a/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch b/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch new file mode 100644 index 0000000000000000000000000000000000000000..a901a500181bb9a36f9bd307d8bdee5929b6144f --- /dev/null +++ b/qga-Plug-unlikely-memory-leak-in-guest-set-memory-bl.patch @@ -0,0 +1,40 @@ +From 1580682eafb489eaf417456778267662629cf696 Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Tue, 30 Jun 2020 11:03:33 +0200 +Subject: [PATCH 05/11] qga: Plug unlikely memory leak in + guest-set-memory-blocks + +transfer_memory_block() leaks an Error object when reading file +/sys/devices/system/memory/memory/state fails with errno other +than ENOENT, and @sys2memblk is false, i.e. when the state file exists +but cannot be read (seems quite unlikely), and this is +guest-set-memory-blocks, not guest-get-memory-blocks. + +Plug the leak. + +Fixes: bd240fca42d5f072fb758a71720d9de9990ac553 +Cc: Michael Roth +Cc: Hailiang Zhang +Signed-off-by: Markus Armbruster +Reviewed-by: zhanghailiang +Message-Id: <20200630090351.1247703-9-armbru@redhat.com> +Signed-off-by: BiaoXiang Ye +--- + qga/commands-posix.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index dfc05f5b..c318cee7 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -2420,6 +2420,7 @@ static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk, + if (sys2memblk) { + error_propagate(errp, local_err); + } else { ++ error_free(local_err); + result->response = + GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED; + } +-- +2.27.0.dirty + diff --git a/qga-commands-posix-fix-use-after-free-of-local_err.patch b/qga-commands-posix-fix-use-after-free-of-local_err.patch new file mode 100644 index 0000000000000000000000000000000000000000..9628d0c59445c9d29ddaa39e6fb271fe73a5c274 --- /dev/null +++ b/qga-commands-posix-fix-use-after-free-of-local_err.patch @@ -0,0 +1,49 @@ +From 15847279f29b0bd67b95daefff395cab8fad80d3 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 24 Mar 2020 18:36:30 +0300 +Subject: [PATCH 4/5] qga/commands-posix: fix use after free of local_err + +local_err is used several times in guest_suspend(). Setting non-NULL +local_err will crash, so let's zero it after freeing. Also fix possible +leak of local_err in final if(). + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20200324153630.11882-7-vsementsov@virtuozzo.com> +Reviewed-by: Richard Henderson +Signed-off-by: Markus Armbruster +Signed-off-by: Zhenyu Ye +--- + qga/commands-posix.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index dfc05f5b..66164e6c 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1760,6 +1760,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) + } + + error_free(local_err); ++ local_err = NULL; + + if (pmutils_supports_mode(mode, &local_err)) { + mode_supported = true; +@@ -1771,6 +1772,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) + } + + error_free(local_err); ++ local_err = NULL; + + if (linux_sys_state_supports_mode(mode, &local_err)) { + mode_supported = true; +@@ -1778,6 +1780,7 @@ static void guest_suspend(SuspendMode mode, Error **errp) + } + + if (!mode_supported) { ++ error_free(local_err); + error_setg(errp, + "the requested suspend mode is not supported by the guest"); + } else { +-- +2.22.0.windows.1 + diff --git a/qga-fix-assert-regression-on-guest-shutdown.patch b/qga-fix-assert-regression-on-guest-shutdown.patch new file mode 100644 index 0000000000000000000000000000000000000000..c5f1e1069b5097ff1adf2328bea6a25e9483cda1 --- /dev/null +++ b/qga-fix-assert-regression-on-guest-shutdown.patch @@ -0,0 +1,47 @@ +From aeccff89333c565c7a894f99c17c0044d7d43be2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 4 Jun 2020 11:44:25 +0200 +Subject: [PATCH 02/11] qga: fix assert regression on guest-shutdown +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Since commit 781f2b3d1e ("qga: process_event() simplification"), +send_response() is called unconditionally, but will assert when "rsp" is +NULL. This may happen with QCO_NO_SUCCESS_RESP commands, such as +"guest-shutdown". + +Fixes: 781f2b3d1e5ef389b44016a897fd55e7a780bf35 +Cc: Michael Roth +Reported-by: Christian Ehrhardt +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Christian Ehrhardt +Tested-by: Christian Ehrhardt +Cc: qemu-stable@nongnu.org +Signed-off-by: Michael Roth +Signed-off-by: BiaoXiang Ye +--- + qga/main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/qga/main.c b/qga/main.c +index c35c2a21..12fa463f 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -529,7 +529,11 @@ static int send_response(GAState *s, const QDict *rsp) + QString *payload_qstr, *response_qstr; + GIOStatus status; + +- g_assert(rsp && s->channel); ++ g_assert(s->channel); ++ ++ if (!rsp) { ++ return 0; ++ } + + payload_qstr = qobject_to_json(QOBJECT(rsp)); + if (!payload_qstr) { +-- +2.27.0.dirty + diff --git a/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch b/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ceb1e70b84f1e1a9a3f785ff2d4d55b697a7cb4 --- /dev/null +++ b/qmp-fix-leak-on-callbacks-that-return-both-value-and.patch @@ -0,0 +1,47 @@ +From 1f1949368d4ac7a18973aa83a074daf01daf97ad Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 25 Mar 2020 19:47:22 +0100 +Subject: [PATCH 3/5] qmp: fix leak on callbacks that return both value and + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Direct leak of 4120 byte(s) in 1 object(s) allocated from: + #0 0x7fa114931887 in __interceptor_calloc (/lib64/libasan.so.6+0xb0887) + #1 0x7fa1144ad8f0 in g_malloc0 (/lib64/libglib-2.0.so.0+0x588f0) + #2 0x561e3c9c8897 in qmp_object_add /home/elmarco/src/qemu/qom/qom-qmp-cmds.c:291 + #3 0x561e3cf48736 in qmp_dispatch /home/elmarco/src/qemu/qapi/qmp-dispatch.c:155 + #4 0x561e3c8efb36 in monitor_qmp_dispatch /home/elmarco/src/qemu/monitor/qmp.c:145 + #5 0x561e3c8f09ed in monitor_qmp_bh_dispatcher /home/elmarco/src/qemu/monitor/qmp.c:234 + #6 0x561e3d08c993 in aio_bh_call /home/elmarco/src/qemu/util/async.c:136 + #7 0x561e3d08d0a5 in aio_bh_poll /home/elmarco/src/qemu/util/async.c:164 + #8 0x561e3d0a535a in aio_dispatch /home/elmarco/src/qemu/util/aio-posix.c:380 + #9 0x561e3d08e3ca in aio_ctx_dispatch /home/elmarco/src/qemu/util/async.c:298 + #10 0x7fa1144a776e in g_main_context_dispatch (/lib64/libglib-2.0.so.0+0x5276e) + +Signed-off-by: Marc-André Lureau +Message-Id: <20200325184723.2029630-3-marcandre.lureau@redhat.com> +Reviewed-by: Markus Armbruster +Signed-off-by: Paolo Bonzini +Signed-off-by: Zhenyu Ye +--- + qapi/qmp-dispatch.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 6dfdad57..a635abb9 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -189,6 +189,8 @@ QDict *qmp_dispatch(QmpCommandList *cmds, QObject *request, + + ret = do_qmp_dispatch(cmds, request, allow_oob, &err); + if (err) { ++ /* or assert(!ret) after reviewing all handlers: */ ++ qobject_unref(ret); + rsp = qmp_error_response(err); + } else if (ret) { + rsp = qdict_new(); +-- +2.22.0.windows.1 + diff --git a/qxl-check-release-info-object.patch b/qxl-check-release-info-object.patch deleted file mode 100644 index aeddbe467c34aa83a3c7b8bca6b98b44abf546a6..0000000000000000000000000000000000000000 --- a/qxl-check-release-info-object.patch +++ /dev/null @@ -1,36 +0,0 @@ -From cbed4e0108ca1403f1f47cde292330b87a0d8bf2 Mon Sep 17 00:00:00 2001 -From: Prasad J Pandit -Date: Thu, 25 Apr 2019 12:05:34 +0530 -Subject: [PATCH] qxl: check release info object - -When releasing spice resources in release_resource() routine, -if release info object 'ext.info' is null, it leads to null -pointer dereference. Add check to avoid it. - -(This is cherry-pick d52680fc932efb8a2f334cc6993e705ed1e31e99) - -Reported-by: Bugs SysSec -Signed-off-by: Prasad J Pandit -Message-id: 20190425063534.32747-1-ppandit@redhat.com -Signed-off-by: Gerd Hoffmann ---- - hw/display/qxl.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index c8ce578..632923a 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -777,6 +777,9 @@ static void interface_release_resource(QXLInstance *sin, - QXLReleaseRing *ring; - uint64_t *item, id; - -+ if (!ext.info) { -+ return; -+ } - if (ext.group_id == MEMSLOT_GROUP_HOST) { - /* host group -> vga mode update request */ - QXLCommandExt *cmdext = (void *)(intptr_t)(ext.info->id); --- -1.8.3.1 - diff --git a/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch b/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch new file mode 100644 index 0000000000000000000000000000000000000000..00e672662ddd5d848fc031967a0efdcf9dc4432b --- /dev/null +++ b/roms-Makefile.edk2-don-t-pull-in-submodules-when-bui.patch @@ -0,0 +1,54 @@ +From fc5afb1a9230fe21d76bcef527b0d3cee90a2cd3 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 12 Sep 2019 18:12:02 -0500 +Subject: [PATCH] roms/Makefile.edk2: don't pull in submodules when building + from tarball +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently the `make efi` target pulls submodules nested under the +roms/edk2 submodule as dependencies. However, when we attempt to build +from a tarball this fails since we are no longer in a git tree. + +A preceding patch will pre-populate these submodules in the tarball, +so assume this build dependency is only needed when building from a +git tree. + +Cc: Laszlo Ersek +Cc: Bruce Rogers +Cc: qemu-stable@nongnu.org # v4.1.0 +Reported-by: Bruce Rogers +Reviewed-by: Laszlo Ersek +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Philippe Mathieu-Daudé +Signed-off-by: Michael Roth +Message-Id: <20190912231202.12327-3-mdroth@linux.vnet.ibm.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f3e330e3c319160ac04954399b5a10afc965098c) +Signed-off-by: Michael Roth +--- + roms/Makefile.edk2 | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2 +index c2f2ff59d5..33a074d3a4 100644 +--- a/roms/Makefile.edk2 ++++ b/roms/Makefile.edk2 +@@ -46,8 +46,13 @@ all: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd.bz2) \ + # files. + .INTERMEDIATE: $(foreach flashdev,$(flashdevs),../pc-bios/edk2-$(flashdev).fd) + ++# Fetch edk2 submodule's submodules. If it is not in a git tree, assume ++# we're building from a tarball and that they've already been fetched by ++# make-release/tarball scripts. + submodules: +- cd edk2 && git submodule update --init --force ++ if test -d edk2/.git; then \ ++ cd edk2 && git submodule update --init --force; \ ++ fi + + # See notes on the ".NOTPARALLEL" target and the "+" indicator in + # "tests/uefi-test-tools/Makefile". +-- +2.23.0 diff --git a/scsi-bus-Refactor-the-code-that-retries-requests.patch b/scsi-bus-Refactor-the-code-that-retries-requests.patch new file mode 100644 index 0000000000000000000000000000000000000000..eae42b854e2ba7818ff3c5e9812c7e3ed7f94ac9 --- /dev/null +++ b/scsi-bus-Refactor-the-code-that-retries-requests.patch @@ -0,0 +1,68 @@ +From eb55d7c4f6e0adae2aab8bd750dccf9cd7a8c784 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:54 +0800 +Subject: [PATCH] scsi-bus: Refactor the code that retries requests + +Move the code that retries requests from scsi_dma_restart_bh() to its own, +non-static, function. This will allow us to call it from the +retry_request_cb() of scsi-disk in a future patch. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/scsi/scsi-bus.c | 16 +++++++++++----- + include/hw/scsi/scsi.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index fdc3a0e4e0..9dc09b5f3e 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -99,14 +99,10 @@ void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus), &error_abort); + } + +-static void scsi_dma_restart_bh(void *opaque) ++void scsi_retry_requests(SCSIDevice *s) + { +- SCSIDevice *s = opaque; + SCSIRequest *req, *next; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.blk)); + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + scsi_req_ref(req); +@@ -128,6 +124,16 @@ static void scsi_dma_restart_bh(void *opaque) + aio_context_release(blk_get_aio_context(s->conf.blk)); + } + ++static void scsi_dma_restart_bh(void *opaque) ++{ ++ SCSIDevice *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ scsi_retry_requests(s); ++} ++ + void scsi_req_retry(SCSIRequest *req) + { + /* No need to save a reference, because scsi_dma_restart_bh just +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 426566a5c6..1231d30b35 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -184,6 +184,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); + void scsi_req_cancel(SCSIRequest *req); + void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); + void scsi_req_retry(SCSIRequest *req); ++void scsi_retry_requests(SCSIDevice *s); + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); + void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); + void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); +-- +2.27.0 + diff --git a/scsi-disk-Add-support-for-retry-on-errors.patch b/scsi-disk-Add-support-for-retry-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0bd91ec86fd13c70499567df0c18f9761e06c35 --- /dev/null +++ b/scsi-disk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,76 @@ +From 34f1552a6d7e05f2f2146ebc6d50deb2de7e5fd4 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:55 +0800 +Subject: [PATCH] scsi-disk: Add support for retry on errors + +Mark failed requests as to be retried and implement retry_request_cb to +handle these requests. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/scsi/scsi-disk.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index cd90cd780e..93fdd913fe 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -184,6 +184,8 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) + + static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -193,6 +195,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, -ret, acct_failed); + } + ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } + +@@ -480,6 +483,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) + } + } + ++ if (action == BLOCK_ERROR_ACTION_RETRY) { ++ scsi_req_retry(&r->req); ++ } ++ + blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_IGNORE) { + scsi_req_complete(&r->req, 0); +@@ -2252,6 +2259,13 @@ static void scsi_disk_resize_cb(void *opaque) + } + } + ++static void scsi_disk_retry_request(void *opaque) ++{ ++ SCSIDiskState *s = opaque; ++ ++ scsi_retry_requests(&s->qdev); ++} ++ + static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) + { + SCSIDiskState *s = opaque; +@@ -2300,10 +2314,12 @@ static const BlockDevOps scsi_disk_removable_block_ops = { + .is_medium_locked = scsi_cd_is_medium_locked, + + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static const BlockDevOps scsi_disk_block_ops = { + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static void scsi_disk_unit_attention_reported(SCSIDevice *dev) +-- +2.27.0 + diff --git a/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch new file mode 100644 index 0000000000000000000000000000000000000000..5d20a9f009c9bd52f9eef578344c5b0012ee8942 --- /dev/null +++ b/scsi-lsi-exit-infinite-loop-while-executing-script-C.patch @@ -0,0 +1,104 @@ +From 051c9b3cbcb4beb42a6ed017c2146ec3e7a754fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 14 Aug 2019 17:35:21 +0530 +Subject: [PATCH] scsi: lsi: exit infinite loop while executing script + (CVE-2019-12068) + +When executing script in lsi_execute_script(), the LSI scsi adapter +emulator advances 's->dsp' index to read next opcode. This can lead +to an infinite loop if the next opcode is empty. Move the existing +loop exit after 10k iterations so that it covers no-op opcodes as +well. + +Reported-by: Bugs SysSec +Signed-off-by: Paolo Bonzini +Signed-off-by: Prasad J Pandit +Signed-off-by: Paolo Bonzini +(cherry picked from commit de594e47659029316bbf9391efb79da0a1a08e08) +Signed-off-by: Michael Roth +--- + hw/scsi/lsi53c895a.c | 41 +++++++++++++++++++++++++++-------------- + 1 file changed, 27 insertions(+), 14 deletions(-) + +diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c +index 10468c1ec1..72f7b59ab5 100644 +--- a/hw/scsi/lsi53c895a.c ++++ b/hw/scsi/lsi53c895a.c +@@ -185,6 +185,9 @@ static const char *names[] = { + /* Flag set if this is a tagged command. */ + #define LSI_TAG_VALID (1 << 16) + ++/* Maximum instructions to process. */ ++#define LSI_MAX_INSN 10000 ++ + typedef struct lsi_request { + SCSIRequest *req; + uint32_t tag; +@@ -1132,7 +1135,21 @@ static void lsi_execute_script(LSIState *s) + + s->istat1 |= LSI_ISTAT1_SRUN; + again: +- insn_processed++; ++ if (++insn_processed > LSI_MAX_INSN) { ++ /* Some windows drivers make the device spin waiting for a memory ++ location to change. If we have been executed a lot of code then ++ assume this is the case and force an unexpected device disconnect. ++ This is apparently sufficient to beat the drivers into submission. ++ */ ++ if (!(s->sien0 & LSI_SIST0_UDC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "lsi_scsi: inf. loop with UDC masked"); ++ } ++ lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); ++ lsi_disconnect(s); ++ trace_lsi_execute_script_stop(); ++ return; ++ } + insn = read_dword(s, s->dsp); + if (!insn) { + /* If we receive an empty opcode increment the DSP by 4 bytes +@@ -1569,19 +1586,7 @@ again: + } + } + } +- if (insn_processed > 10000 && s->waiting == LSI_NOWAIT) { +- /* Some windows drivers make the device spin waiting for a memory +- location to change. If we have been executed a lot of code then +- assume this is the case and force an unexpected device disconnect. +- This is apparently sufficient to beat the drivers into submission. +- */ +- if (!(s->sien0 & LSI_SIST0_UDC)) { +- qemu_log_mask(LOG_GUEST_ERROR, +- "lsi_scsi: inf. loop with UDC masked"); +- } +- lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0); +- lsi_disconnect(s); +- } else if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { ++ if (s->istat1 & LSI_ISTAT1_SRUN && s->waiting == LSI_NOWAIT) { + if (s->dcntl & LSI_DCNTL_SSM) { + lsi_script_dma_interrupt(s, LSI_DSTAT_SSI); + } else { +@@ -1969,6 +1974,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + case 0x2f: /* DSP[24:31] */ + s->dsp &= 0x00ffffff; + s->dsp |= val << 24; ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((s->dmode & LSI_DMODE_MAN) == 0 + && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); +@@ -1987,6 +1996,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) + break; + case 0x3b: /* DCNTL */ + s->dcntl = val & ~(LSI_DCNTL_PFF | LSI_DCNTL_STD); ++ /* ++ * FIXME: if s->waiting != LSI_NOWAIT, this will only execute one ++ * instruction. Is this correct? ++ */ + if ((val & LSI_DCNTL_STD) && (s->istat1 & LSI_ISTAT1_SRUN) == 0) + lsi_execute_script(s); + break; +-- +2.23.0 diff --git a/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch b/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch new file mode 100644 index 0000000000000000000000000000000000000000..e38bfaa471d280a7de334e81906747938ab57b7c --- /dev/null +++ b/sd-sdhci-assert-data_count-is-within-fifo_buffer.patch @@ -0,0 +1,65 @@ +From e8d2655821caa2b8efce429c0036a93342b8383d Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Mon, 8 Feb 2021 17:14:21 +0800 +Subject: [PATCH] sd: sdhci: assert data_count is within fifo_buffer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2020-17380 + +While doing multi block SDMA, transfer block size may exceed +the 's->fifo_buffer[s->buf_maxsz]' size. It may leave the +current element pointer 's->data_count' pointing out of bounds. +Leading the subsequent DMA r/w operation to OOB access issue. +Assert that 's->data_count' is within fifo_buffer. + + -> https://ruhr-uni-bochum.sciebo.de/s/NNWP2GfwzYKeKwE?path=%2Fsdhci_oob_write1 + ==1459837==ERROR: AddressSanitizer: heap-buffer-overflow + WRITE of size 54722048 at 0x61500001e280 thread T3 + #0 __interceptor_memcpy (/lib64/libasan.so.6+0x3a71d) + #1 flatview_read_continue ../exec.c:3245 + #2 flatview_read ../exec.c:3278 + #3 address_space_read_full ../exec.c:3291 + #4 address_space_rw ../exec.c:3319 + #5 dma_memory_rw_relaxed ../include/sysemu/dma.h:87 + #6 dma_memory_rw ../include/sysemu/dma.h:110 + #7 dma_memory_read ../include/sysemu/dma.h:116 + #8 sdhci_sdma_transfer_multi_blocks ../hw/sd/sdhci.c:629 + #9 sdhci_write ../hw/sd/sdhci.c:1097 + #10 memory_region_write_accessor ../softmmu/memory.c:483 + ... + +Reported-by: Ruhr-University +Suggested-by: Philippe Mathieu-Daudé +Signed-off-by: Prasad J Pandit + +patch link: https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg01175.html +Signed-off-by: Jiajie Li +--- + hw/sd/sdhci.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c +index 7b80b1d93f..e51573fe3c 100644 +--- a/hw/sd/sdhci.c ++++ b/hw/sd/sdhci.c +@@ -613,6 +613,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) + s->blkcnt--; + } + } ++ assert(s->data_count <= s->buf_maxsz && s->data_count > begin); + dma_memory_write(s->dma_as, s->sdmasysad, + &s->fifo_buffer[begin], s->data_count - begin); + s->sdmasysad += s->data_count - begin; +@@ -635,6 +636,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s) + s->data_count = block_size; + boundary_count -= block_size - begin; + } ++ assert(s->data_count <= s->buf_maxsz && s->data_count > begin); + dma_memory_read(s->dma_as, s->sdmasysad, + &s->fifo_buffer[begin], s->data_count - begin); + s->sdmasysad += s->data_count - begin; +-- +2.27.0 + diff --git a/seqlock-fix-seqlock_write_unlock_impl-function.patch b/seqlock-fix-seqlock_write_unlock_impl-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7f8c7cf6e044a663886db1b89f6c8bda36e2d25 --- /dev/null +++ b/seqlock-fix-seqlock_write_unlock_impl-function.patch @@ -0,0 +1,44 @@ +From 96e00e040cd8ae23cebf183cf3a8dc9cf1f6149d Mon Sep 17 00:00:00 2001 +From: Luc Michel +Date: Wed, 29 Jan 2020 15:49:48 +0100 +Subject: [PATCH] seqlock: fix seqlock_write_unlock_impl function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The seqlock write unlock function was incorrectly calling +seqlock_write_begin() instead of seqlock_write_end(), and was releasing +the lock before incrementing the sequence. This could lead to a race +condition and a corrupted sequence number becoming odd even though the +lock is not held. + +Signed-off-by: Luc Michel +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20200129144948.2161551-1-luc.michel@greensocs.com> +Fixes: 988fcafc73 ("seqlock: add QemuLockable support", 2018-08-23) +Signed-off-by: Paolo Bonzini +--- + include/qemu/seqlock.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h +index fd408b7ec5..8b6b4ee4bb 100644 +--- a/include/qemu/seqlock.h ++++ b/include/qemu/seqlock.h +@@ -55,11 +55,11 @@ static inline void seqlock_write_lock_impl(QemuSeqLock *sl, QemuLockable *lock) + #define seqlock_write_lock(sl, lock) \ + seqlock_write_lock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) + +-/* Lock out other writers and update the count. */ ++/* Update the count and release the lock. */ + static inline void seqlock_write_unlock_impl(QemuSeqLock *sl, QemuLockable *lock) + { ++ seqlock_write_end(sl); + qemu_lockable_unlock(lock); +- seqlock_write_begin(sl); + } + #define seqlock_write_unlock(sl, lock) \ + seqlock_write_unlock_impl(sl, QEMU_MAKE_LOCKABLE(lock)) +-- +2.27.0 + diff --git a/slirp-check-pkt_len-before-reading-protocol-header.patch b/slirp-check-pkt_len-before-reading-protocol-header.patch new file mode 100644 index 0000000000000000000000000000000000000000..506e31e1cb809bba769e857177c4e2ad70f4293f --- /dev/null +++ b/slirp-check-pkt_len-before-reading-protocol-header.patch @@ -0,0 +1,61 @@ +From c2df0d478b2605da10363ab57825cdbc34caa680 Mon Sep 17 00:00:00 2001 +From: Alex Chen +Date: Mon, 14 Dec 2020 15:39:46 +0800 +Subject: [PATCH] slirp: check pkt_len before reading protocol header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +While processing ARP/NCSI packets in 'arp_input' or 'ncsi_input' +routines, ensure that pkt_len is large enough to accommodate the +respective protocol headers, lest it should do an OOB access. +Add check to avoid it. + +CVE-2020-29129 CVE-2020-29130 + QEMU: slirp: out-of-bounds access while processing ARP/NCSI packets + -> https://www.openwall.com/lists/oss-security/2020/11/27/1 + +Reported-by: Qiuhao Li +Signed-off-by: Prasad J Pandit +Message-Id: <20201126135706.273950-1-ppandit@redhat.com> +Reviewed-by: Marc-André Lureau +(cherry-picked from 2e1dcbc0) +Signed-off-by: Alex Chen +--- + slirp/src/ncsi.c | 4 ++++ + slirp/src/slirp.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +index 6864b735..251c0d2b 100644 +--- a/slirp/src/ncsi.c ++++ b/slirp/src/ncsi.c +@@ -147,6 +147,10 @@ void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) + uint32_t checksum; + uint32_t *pchecksum; + ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ + memset(ncsi_reply, 0, sizeof(ncsi_reply)); + + memset(reh->h_dest, 0xff, ETH_ALEN); +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +index b0194cb3..86b0f52d 100644 +--- a/slirp/src/slirp.c ++++ b/slirp/src/slirp.c +@@ -700,6 +700,10 @@ static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) + return; + } + ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ + ar_op = ntohs(ah->ar_op); + switch (ar_op) { + case ARPOP_REQUEST: +-- +2.23.0 + diff --git a/slirp-tftp-restrict-relative-path-access.patch b/slirp-tftp-restrict-relative-path-access.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7f09462525437c9048b4ab249b6e4208adda4ef --- /dev/null +++ b/slirp-tftp-restrict-relative-path-access.patch @@ -0,0 +1,37 @@ +From 2fc07f4ce31a2cc9973cfb1c20897c6a4babd8b8 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Fri, 15 May 2020 16:45:28 +0800 +Subject: [PATCH] slirp: tftp: restrict relative path access + +tftp restricts relative or directory path access on Linux systems. +Apply same restrictions on Windows systems too. It helps to avoid +directory traversal issue. + +Fixes: https://bugs.launchpad.net/qemu/+bug/1812451Reported-by: default avatarPeter Maydell +Signed-off-by: default avatarPrasad J Pandit +Reviewed-by: Samuel Thibault's avatarSamuel Thibault +Message-Id: <20200113121431.156708-1-ppandit@redhat.com> + +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +index 093c2e06..2b4176cc 100644 +--- a/slirp/src/tftp.c ++++ b/slirp/src/tftp.c +@@ -344,8 +344,13 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, + k += 6; /* skipping octet */ + + /* do sanity checks on the filename */ +- if (!strncmp(req_fname, "../", 3) || +- req_fname[strlen(req_fname) - 1] == '/' || strstr(req_fname, "/../")) { ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) -1] == '/') { + tftp_send_error(spt, 2, "Access violation", tp); + return; + } +-- +2.23.0 + diff --git a/slirp-use-correct-size-while-emulating-IRC-commands.patch b/slirp-use-correct-size-while-emulating-IRC-commands.patch index 4503688dd1502c85f7a8e0fee1e5a36bcca838b2..1b4039e1da3c0bcd08b97f4c61983ef3adac3823 100644 --- a/slirp-use-correct-size-while-emulating-IRC-commands.patch +++ b/slirp-use-correct-size-while-emulating-IRC-commands.patch @@ -1,6 +1,6 @@ -From 882149fd8401f8ff667ea384bb68008354fd110f Mon Sep 17 00:00:00 2001 +From 011880f527ff317a40769ea8673a6353e5db53ac Mon Sep 17 00:00:00 2001 From: Prasad J Pandit -Date: Wed, 11 Mar 2020 18:19:36 +0800 +Date: Tue, 14 Apr 2020 18:23:23 +0800 Subject: [PATCH] slirp: use correct size while emulating IRC commands While emulating IRC DCC commands, tcp_emu() uses 'mbuf' size @@ -13,40 +13,42 @@ Signed-off-by: default avatarPrasad J Pandit Reviewed-by: Samuel Thibault's avatarSamuel Thibault Message-Id: <20200109094228.79764-2-ppandit@redhat.com> --- - slirp/src/tcp_subr.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) + slirp/src/tcp_subr.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 4608942f..2053b11b 100644 +index 9c94c03a..2a15b16a 100644 --- a/slirp/src/tcp_subr.c +++ b/slirp/src/tcp_subr.c -@@ -786,7 +786,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); -@@ -797,7 +797,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); -@@ -808,7 +808,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); --- -2.21.1 (Apple Git-122.3) - +@@ -778,7 +778,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size, "DCC CHAT chat %lu %u%c\n", ++ m->m_len += snprintf(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); + } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, +@@ -789,7 +790,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, m->m_size, "DCC SEND %s %lu %u %u%c\n", buff, ++ snprintf(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, +@@ -800,7 +802,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, m->m_size, "DCC MOVE %s %lu %u %u%c\n", buff, ++ snprintf(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); + } +-- +2.23.0 diff --git a/slirp-use-correct-size-while-emulating-commands.patch b/slirp-use-correct-size-while-emulating-commands.patch index 76507a4d82f3c769540cf34294d855242ac0737f..25f64e2738e2ae3cbff541719b726dff963007d2 100644 --- a/slirp-use-correct-size-while-emulating-commands.patch +++ b/slirp-use-correct-size-while-emulating-commands.patch @@ -1,6 +1,6 @@ -From 66e2f47a01ffcaafe11acae0a191efd1805f86c6 Mon Sep 17 00:00:00 2001 +From 662aa4f1d168b32335a4dc40782e816329afcac0 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit -Date: Wed, 11 Mar 2020 18:27:22 +0800 +Date: Tue, 14 Apr 2020 18:36:12 +0800 Subject: [PATCH] slirp: use correct size while emulating commands While emulating services in tcp_emu(), it uses 'mbuf' size @@ -10,42 +10,40 @@ Signed-off-by: default avatarPrasad J Pandit Signed-off-by: Samuel Thibault's avatarSamuel Thibault Message-Id: <20200109094228.79764-3-ppandit@redhat.com> --- - slirp/src/tcp_subr.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) + slirp/src/tcp_subr.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index 2053b11b..e898fd03 100644 +index 2a15b16a..019b637a 100644 --- a/slirp/src/tcp_subr.c +++ b/slirp/src/tcp_subr.c -@@ -707,7 +707,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", - n1, n2, n3, n4, n5, n6, x==7?buff:""); - return 1; -@@ -740,7 +740,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, m->m_size - m->m_len, -+ m->m_len += snprintf(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x==7?buff:""); - -@@ -766,8 +766,8 @@ tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len-1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = snprintf(m->m_data, m->m_size, "%d", -- ntohs(so->so_fport)) + 1; -+ m->m_len = snprintf(m->m_data, M_ROOM(m), -+ "%d", ntohs(so->so_fport)) + 1; - return 1; - - case EMU_IRC: --- -2.21.1 (Apple Git-122.3) +@@ -696,7 +696,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, m->m_size - m->m_len, ++ m->m_len += snprintf(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, + n5, n6, x == 7 ? buff : ""); + return 1; +@@ -732,7 +732,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, m->m_size - m->m_len, ++ snprintf(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + +@@ -759,7 +759,8 @@ int tcp_emu(struct socket *so, struct mbuf *m) + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) + m->m_len = +- snprintf(m->m_data, m->m_size, "%d", ntohs(so->so_fport)) + 1; ++ snprintf(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)) + 1; + return 1; + + case EMU_IRC: +-- +2.23.0 diff --git a/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch b/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch new file mode 100644 index 0000000000000000000000000000000000000000..66e54cdd42053d31cfa05ffa9ee15fca183254ea --- /dev/null +++ b/sm501-Clean-up-local-variables-in-sm501_2d_operation.patch @@ -0,0 +1,95 @@ +From 6186d3de416825e3a737dd3da31da475f50d66d0 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Thu, 21 May 2020 21:39:44 +0200 +Subject: [PATCH] sm501: Clean up local variables in sm501_2d_operation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Make variables local to the block they are used in to make it clearer +which operation they are needed for. + +Signed-off-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-Daudé +Message-id: ae59f8138afe7f6a5a4a82539d0f61496a906b06.1590089984.git.balaton@eik.bme.hu +Signed-off-by: Gerd Hoffmann +--- + hw/display/sm501.c | 31 ++++++++++++++++--------------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +diff --git a/hw/display/sm501.c b/hw/display/sm501.c +index f3d11d0b23..98b3b97f7b 100644 +--- a/hw/display/sm501.c ++++ b/hw/display/sm501.c +@@ -699,28 +699,19 @@ static inline void hwc_invalidate(SM501State *s, int crt) + + static void sm501_2d_operation(SM501State *s) + { +- /* obtain operation parameters */ + int cmd = (s->twoD_control >> 16) & 0x1F; + int rtl = s->twoD_control & BIT(27); +- int src_x = (s->twoD_source >> 16) & 0x01FFF; +- int src_y = s->twoD_source & 0xFFFF; +- int dst_x = (s->twoD_destination >> 16) & 0x01FFF; +- int dst_y = s->twoD_destination & 0xFFFF; +- int width = (s->twoD_dimension >> 16) & 0x1FFF; +- int height = s->twoD_dimension & 0xFFFF; +- uint32_t color = s->twoD_foreground; + int format = (s->twoD_stretch >> 20) & 0x3; + int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ + /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ + int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; + int rop = s->twoD_control & 0xFF; +- uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; ++ int dst_x = (s->twoD_destination >> 16) & 0x01FFF; ++ int dst_y = s->twoD_destination & 0xFFFF; ++ int width = (s->twoD_dimension >> 16) & 0x1FFF; ++ int height = s->twoD_dimension & 0xFFFF; + uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; +- +- /* get frame buffer info */ +- uint8_t *src = s->local_mem + src_base; + uint8_t *dst = s->local_mem + dst_base; +- int src_pitch = s->twoD_pitch & 0x1FFF; + int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; + int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; + int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); +@@ -758,6 +749,13 @@ static void sm501_2d_operation(SM501State *s) + + switch (cmd) { + case 0x00: /* copy area */ ++ { ++ int src_x = (s->twoD_source >> 16) & 0x01FFF; ++ int src_y = s->twoD_source & 0xFFFF; ++ uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; ++ uint8_t *src = s->local_mem + src_base; ++ int src_pitch = s->twoD_pitch & 0x1FFF; ++ + #define COPY_AREA(_bpp, _pixel_type, rtl) { \ + int y, x, index_d, index_s; \ + for (y = 0; y < height; y++) { \ +@@ -793,8 +791,11 @@ static void sm501_2d_operation(SM501State *s) + break; + } + break; +- ++ } + case 0x01: /* fill rectangle */ ++ { ++ uint32_t color = s->twoD_foreground; ++ + #define FILL_RECT(_bpp, _pixel_type) { \ + int y, x; \ + for (y = 0; y < height; y++) { \ +@@ -819,7 +820,7 @@ static void sm501_2d_operation(SM501State *s) + break; + } + break; +- ++ } + default: + qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", + cmd); +-- +2.23.0 + diff --git a/sm501-Convert-printf-abort-to-qemu_log_mask.patch b/sm501-Convert-printf-abort-to-qemu_log_mask.patch new file mode 100644 index 0000000000000000000000000000000000000000..14a530bd78510f91daf5f78e9b9103f1b729f9cf --- /dev/null +++ b/sm501-Convert-printf-abort-to-qemu_log_mask.patch @@ -0,0 +1,159 @@ +From 428e3a78ddf1de3dfb914043d6a8668f73ef8bb3 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Thu, 21 May 2020 21:39:44 +0200 +Subject: [PATCH] sm501: Convert printf + abort to qemu_log_mask +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Some places already use qemu_log_mask() to log unimplemented features +or errors but some others have printf() then abort(). Convert these to +qemu_log_mask() and avoid aborting to prevent guests to easily cause +denial of service. + +Signed-off-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 305af87f59d81e92f2aaff09eb8a3603b8baa322.1590089984.git.balaton@eik.bme.hu +Signed-off-by: Gerd Hoffmann +--- + hw/display/sm501.c | 57 ++++++++++++++++++++++------------------------ + 1 file changed, 27 insertions(+), 30 deletions(-) + +diff --git a/hw/display/sm501.c b/hw/display/sm501.c +index 5918f59b2b..aa4b202a48 100644 +--- a/hw/display/sm501.c ++++ b/hw/display/sm501.c +@@ -727,8 +727,8 @@ static void sm501_2d_operation(SM501State *s) + int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); + + if (addressing != 0x0) { +- printf("%s: only XY addressing is supported.\n", __func__); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); ++ return; + } + + if (rop_mode == 0) { +@@ -754,8 +754,8 @@ static void sm501_2d_operation(SM501State *s) + + if ((s->twoD_source_base & 0x08000000) || + (s->twoD_destination_base & 0x08000000)) { +- printf("%s: only local memory is supported.\n", __func__); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); ++ return; + } + + switch (operation) { +@@ -823,9 +823,9 @@ static void sm501_2d_operation(SM501State *s) + break; + + default: +- printf("non-implemented SM501 2D operation. %d\n", operation); +- abort(); +- break; ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", ++ operation); ++ return; + } + + if (dst_base >= get_fb_addr(s, crt) && +@@ -892,9 +892,8 @@ static uint64_t sm501_system_config_read(void *opaque, hwaddr addr, + break; + + default: +- printf("sm501 system config : not implemented register read." +- " addr=%x\n", (int)addr); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" ++ "register read. addr=%" HWADDR_PRIx "\n", addr); + } + + return ret; +@@ -948,15 +947,15 @@ static void sm501_system_config_write(void *opaque, hwaddr addr, + break; + case SM501_ENDIAN_CONTROL: + if (value & 0x00000001) { +- printf("sm501 system config : big endian mode not implemented.\n"); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: system config big endian mode not" ++ " implemented.\n"); + } + break; + + default: +- printf("sm501 system config : not implemented register write." +- " addr=%x, val=%x\n", (int)addr, (uint32_t)value); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented system config" ++ "register write. addr=%" HWADDR_PRIx ++ ", val=%" PRIx64 "\n", addr, value); + } + } + +@@ -1207,9 +1206,8 @@ static uint64_t sm501_disp_ctrl_read(void *opaque, hwaddr addr, + break; + + default: +- printf("sm501 disp ctrl : not implemented register read." +- " addr=%x\n", (int)addr); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " ++ "read. addr=%" HWADDR_PRIx "\n", addr); + } + + return ret; +@@ -1345,9 +1343,9 @@ static void sm501_disp_ctrl_write(void *opaque, hwaddr addr, + break; + + default: +- printf("sm501 disp ctrl : not implemented register write." +- " addr=%x, val=%x\n", (int)addr, (unsigned)value); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " ++ "write. addr=%" HWADDR_PRIx ++ ", val=%" PRIx64 "\n", addr, value); + } + } + +@@ -1433,9 +1431,8 @@ static uint64_t sm501_2d_engine_read(void *opaque, hwaddr addr, + ret = 0; /* Should return interrupt status */ + break; + default: +- printf("sm501 disp ctrl : not implemented register read." +- " addr=%x\n", (int)addr); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented disp ctrl register " ++ "read. addr=%" HWADDR_PRIx "\n", addr); + } + + return ret; +@@ -1520,9 +1517,9 @@ static void sm501_2d_engine_write(void *opaque, hwaddr addr, + /* ignored, writing 0 should clear interrupt status */ + break; + default: +- printf("sm501 2d engine : not implemented register write." +- " addr=%x, val=%x\n", (int)addr, (unsigned)value); +- abort(); ++ qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2d engine register " ++ "write. addr=%" HWADDR_PRIx ++ ", val=%" PRIx64 "\n", addr, value); + } + } + +@@ -1670,9 +1667,9 @@ static void sm501_update_display(void *opaque) + draw_line = draw_line32_funcs[dst_depth_index]; + break; + default: +- printf("sm501 update display : invalid control register value.\n"); +- abort(); +- break; ++ qemu_log_mask(LOG_GUEST_ERROR, "sm501: update display" ++ "invalid control register value.\n"); ++ return; + } + + /* set up to draw hardware cursor */ +-- +2.23.0 + diff --git a/sm501-Replace-hand-written-implementation-with-pixma.patch b/sm501-Replace-hand-written-implementation-with-pixma.patch new file mode 100644 index 0000000000000000000000000000000000000000..42fa23aa934b29c19921f869530b93f1f24e1def --- /dev/null +++ b/sm501-Replace-hand-written-implementation-with-pixma.patch @@ -0,0 +1,261 @@ +From bbbf2c2f4201eb84a5bcd07a92399fe166d682e9 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Thu, 21 May 2020 21:39:44 +0200 +Subject: [PATCH] sm501: Replace hand written implementation with pixman where + possible + +Besides being faster this should also prevent malicious guests to +abuse 2D engine to overwrite data or cause a crash. + +Signed-off-by: BALATON Zoltan +Message-id: 58666389b6cae256e4e972a32c05cf8aa51bffc0.1590089984.git.balaton@eik.bme.hu +Signed-off-by: Gerd Hoffmann +--- + hw/display/sm501.c | 207 ++++++++++++++++++++++++++------------------- + 1 file changed, 119 insertions(+), 88 deletions(-) + +diff --git a/hw/display/sm501.c b/hw/display/sm501.c +index 98b3b97f7b..7dc4bb18b7 100644 +--- a/hw/display/sm501.c ++++ b/hw/display/sm501.c +@@ -706,13 +706,12 @@ static void sm501_2d_operation(SM501State *s) + /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ + int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; + int rop = s->twoD_control & 0xFF; +- int dst_x = (s->twoD_destination >> 16) & 0x01FFF; +- int dst_y = s->twoD_destination & 0xFFFF; +- int width = (s->twoD_dimension >> 16) & 0x1FFF; +- int height = s->twoD_dimension & 0xFFFF; ++ unsigned int dst_x = (s->twoD_destination >> 16) & 0x01FFF; ++ unsigned int dst_y = s->twoD_destination & 0xFFFF; ++ unsigned int width = (s->twoD_dimension >> 16) & 0x1FFF; ++ unsigned int height = s->twoD_dimension & 0xFFFF; + uint32_t dst_base = s->twoD_destination_base & 0x03FFFFFF; +- uint8_t *dst = s->local_mem + dst_base; +- int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; ++ unsigned int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; + int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; + int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); + +@@ -721,104 +720,136 @@ static void sm501_2d_operation(SM501State *s) + return; + } + +- if (rop_mode == 0) { +- if (rop != 0xcc) { +- /* Anything other than plain copies are not supported */ +- qemu_log_mask(LOG_UNIMP, "sm501: rop3 mode with rop %x is not " +- "supported.\n", rop); +- } +- } else { +- if (rop2_source_is_pattern && rop != 0x5) { +- /* For pattern source, we support only inverse dest */ +- qemu_log_mask(LOG_UNIMP, "sm501: rop2 source being the pattern and " +- "rop %x is not supported.\n", rop); +- } else { +- if (rop != 0x5 && rop != 0xc) { +- /* Anything other than plain copies or inverse dest is not +- * supported */ +- qemu_log_mask(LOG_UNIMP, "sm501: rop mode %x is not " +- "supported.\n", rop); +- } +- } +- } +- + if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { + qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); + return; + } + ++ if (!dst_pitch) { ++ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero dest pitch.\n"); ++ return; ++ } ++ ++ if (!width || !height) { ++ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero size 2D op.\n"); ++ return; ++ } ++ ++ if (rtl) { ++ dst_x -= width - 1; ++ dst_y -= height - 1; ++ } ++ ++ if (dst_base >= get_local_mem_size(s) || dst_base + ++ (dst_x + width + (dst_y + height) * (dst_pitch + width)) * ++ (1 << format) >= get_local_mem_size(s)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "sm501: 2D op dest is outside vram.\n"); ++ return; ++ } ++ + switch (cmd) { +- case 0x00: /* copy area */ ++ case 0: /* BitBlt */ + { +- int src_x = (s->twoD_source >> 16) & 0x01FFF; +- int src_y = s->twoD_source & 0xFFFF; ++ unsigned int src_x = (s->twoD_source >> 16) & 0x01FFF; ++ unsigned int src_y = s->twoD_source & 0xFFFF; + uint32_t src_base = s->twoD_source_base & 0x03FFFFFF; +- uint8_t *src = s->local_mem + src_base; +- int src_pitch = s->twoD_pitch & 0x1FFF; +- +-#define COPY_AREA(_bpp, _pixel_type, rtl) { \ +- int y, x, index_d, index_s; \ +- for (y = 0; y < height; y++) { \ +- for (x = 0; x < width; x++) { \ +- _pixel_type val; \ +- \ +- if (rtl) { \ +- index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ +- index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ +- } else { \ +- index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ +- index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ +- } \ +- if (rop_mode == 1 && rop == 5) { \ +- /* Invert dest */ \ +- val = ~*(_pixel_type *)&dst[index_d]; \ +- } else { \ +- val = *(_pixel_type *)&src[index_s]; \ +- } \ +- *(_pixel_type *)&dst[index_d] = val; \ +- } \ +- } \ +- } +- switch (format) { +- case 0: +- COPY_AREA(1, uint8_t, rtl); +- break; +- case 1: +- COPY_AREA(2, uint16_t, rtl); +- break; +- case 2: +- COPY_AREA(4, uint32_t, rtl); +- break; ++ unsigned int src_pitch = s->twoD_pitch & 0x1FFF; ++ ++ if (!src_pitch) { ++ qemu_log_mask(LOG_GUEST_ERROR, "sm501: Zero src pitch.\n"); ++ return; ++ } ++ ++ if (rtl) { ++ src_x -= width - 1; ++ src_y -= height - 1; ++ } ++ ++ if (src_base >= get_local_mem_size(s) || src_base + ++ (src_x + width + (src_y + height) * (src_pitch + width)) * ++ (1 << format) >= get_local_mem_size(s)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "sm501: 2D op src is outside vram.\n"); ++ return; ++ } ++ ++ if ((rop_mode && rop == 0x5) || (!rop_mode && rop == 0x55)) { ++ /* Invert dest, is there a way to do this with pixman? */ ++ unsigned int x, y, i; ++ uint8_t *d = s->local_mem + dst_base; ++ ++ for (y = 0; y < height; y++) { ++ i = (dst_x + (dst_y + y) * dst_pitch) * (1 << format); ++ for (x = 0; x < width; x++, i += (1 << format)) { ++ switch (format) { ++ case 0: ++ d[i] = ~d[i]; ++ break; ++ case 1: ++ *(uint16_t *)&d[i] = ~*(uint16_t *)&d[i]; ++ break; ++ case 2: ++ *(uint32_t *)&d[i] = ~*(uint32_t *)&d[i]; ++ break; ++ } ++ } ++ } ++ } else { ++ /* Do copy src for unimplemented ops, better than unpainted area */ ++ if ((rop_mode && (rop != 0xc || rop2_source_is_pattern)) || ++ (!rop_mode && rop != 0xcc)) { ++ qemu_log_mask(LOG_UNIMP, ++ "sm501: rop%d op %x%s not implemented\n", ++ (rop_mode ? 2 : 3), rop, ++ (rop2_source_is_pattern ? ++ " with pattern source" : "")); ++ } ++ /* Check for overlaps, this could be made more exact */ ++ uint32_t sb, se, db, de; ++ sb = src_base + src_x + src_y * (width + src_pitch); ++ se = sb + width + height * (width + src_pitch); ++ db = dst_base + dst_x + dst_y * (width + dst_pitch); ++ de = db + width + height * (width + dst_pitch); ++ if (rtl && ((db >= sb && db <= se) || (de >= sb && de <= se))) { ++ /* regions may overlap: copy via temporary */ ++ int llb = width * (1 << format); ++ int tmp_stride = DIV_ROUND_UP(llb, sizeof(uint32_t)); ++ uint32_t *tmp = g_malloc(tmp_stride * sizeof(uint32_t) * ++ height); ++ pixman_blt((uint32_t *)&s->local_mem[src_base], tmp, ++ src_pitch * (1 << format) / sizeof(uint32_t), ++ tmp_stride, 8 * (1 << format), 8 * (1 << format), ++ src_x, src_y, 0, 0, width, height); ++ pixman_blt(tmp, (uint32_t *)&s->local_mem[dst_base], ++ tmp_stride, ++ dst_pitch * (1 << format) / sizeof(uint32_t), ++ 8 * (1 << format), 8 * (1 << format), ++ 0, 0, dst_x, dst_y, width, height); ++ g_free(tmp); ++ } else { ++ pixman_blt((uint32_t *)&s->local_mem[src_base], ++ (uint32_t *)&s->local_mem[dst_base], ++ src_pitch * (1 << format) / sizeof(uint32_t), ++ dst_pitch * (1 << format) / sizeof(uint32_t), ++ 8 * (1 << format), 8 * (1 << format), ++ src_x, src_y, dst_x, dst_y, width, height); ++ } + } + break; + } +- case 0x01: /* fill rectangle */ ++ case 1: /* Rectangle Fill */ + { + uint32_t color = s->twoD_foreground; + +-#define FILL_RECT(_bpp, _pixel_type) { \ +- int y, x; \ +- for (y = 0; y < height; y++) { \ +- for (x = 0; x < width; x++) { \ +- int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ +- *(_pixel_type *)&dst[index] = (_pixel_type)color; \ +- } \ +- } \ +- } +- +- switch (format) { +- case 0: +- FILL_RECT(1, uint8_t); +- break; +- case 1: +- color = cpu_to_le16(color); +- FILL_RECT(2, uint16_t); +- break; +- case 2: ++ if (format == 2) { + color = cpu_to_le32(color); +- FILL_RECT(4, uint32_t); +- break; ++ } else if (format == 1) { ++ color = cpu_to_le16(color); + } ++ ++ pixman_fill((uint32_t *)&s->local_mem[dst_base], ++ dst_pitch * (1 << format) / sizeof(uint32_t), ++ 8 * (1 << format), dst_x, dst_y, width, height, color); + break; + } + default: +-- +2.23.0 + diff --git a/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch b/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2eefcba1e31e338ad6d934bf9e112a98d013b04 --- /dev/null +++ b/sm501-Shorten-long-variable-names-in-sm501_2d_operat.patch @@ -0,0 +1,134 @@ +From bc472e56b985db1de73a7ddab5ea8568d6e7f327 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Thu, 21 May 2020 21:39:44 +0200 +Subject: [PATCH] sm501: Shorten long variable names in sm501_2d_operation + +This increases readability and cleans up some confusing naming. + +Signed-off-by: BALATON Zoltan +Message-id: b9b67b94c46e945252a73c77dfd117132c63c4fb.1590089984.git.balaton@eik.bme.hu +Signed-off-by: Gerd Hoffmann +--- + hw/display/sm501.c | 45 ++++++++++++++++++++++----------------------- + 1 file changed, 22 insertions(+), 23 deletions(-) + +diff --git a/hw/display/sm501.c b/hw/display/sm501.c +index aa4b202a48..51e7ccc39d 100644 +--- a/hw/display/sm501.c ++++ b/hw/display/sm501.c +@@ -700,17 +700,16 @@ static inline void hwc_invalidate(SM501State *s, int crt) + static void sm501_2d_operation(SM501State *s) + { + /* obtain operation parameters */ +- int operation = (s->twoD_control >> 16) & 0x1f; ++ int cmd = (s->twoD_control >> 16) & 0x1F; + int rtl = s->twoD_control & 0x8000000; + int src_x = (s->twoD_source >> 16) & 0x01FFF; + int src_y = s->twoD_source & 0xFFFF; + int dst_x = (s->twoD_destination >> 16) & 0x01FFF; + int dst_y = s->twoD_destination & 0xFFFF; +- int operation_width = (s->twoD_dimension >> 16) & 0x1FFF; +- int operation_height = s->twoD_dimension & 0xFFFF; ++ int width = (s->twoD_dimension >> 16) & 0x1FFF; ++ int height = s->twoD_dimension & 0xFFFF; + uint32_t color = s->twoD_foreground; +- int format_flags = (s->twoD_stretch >> 20) & 0x3; +- int addressing = (s->twoD_stretch >> 16) & 0xF; ++ int format = (s->twoD_stretch >> 20) & 0x3; + int rop_mode = (s->twoD_control >> 15) & 0x1; /* 1 for rop2, else rop3 */ + /* 1 if rop2 source is the pattern, otherwise the source is the bitmap */ + int rop2_source_is_pattern = (s->twoD_control >> 14) & 0x1; +@@ -721,12 +720,12 @@ static void sm501_2d_operation(SM501State *s) + /* get frame buffer info */ + uint8_t *src = s->local_mem + src_base; + uint8_t *dst = s->local_mem + dst_base; +- int src_width = s->twoD_pitch & 0x1FFF; +- int dst_width = (s->twoD_pitch >> 16) & 0x1FFF; ++ int src_pitch = s->twoD_pitch & 0x1FFF; ++ int dst_pitch = (s->twoD_pitch >> 16) & 0x1FFF; + int crt = (s->dc_crt_control & SM501_DC_CRT_CONTROL_SEL) ? 1 : 0; + int fb_len = get_width(s, crt) * get_height(s, crt) * get_bpp(s, crt); + +- if (addressing != 0x0) { ++ if ((s->twoD_stretch >> 16) & 0xF) { + qemu_log_mask(LOG_UNIMP, "sm501: only XY addressing is supported.\n"); + return; + } +@@ -758,20 +757,20 @@ static void sm501_2d_operation(SM501State *s) + return; + } + +- switch (operation) { ++ switch (cmd) { + case 0x00: /* copy area */ + #define COPY_AREA(_bpp, _pixel_type, rtl) { \ + int y, x, index_d, index_s; \ +- for (y = 0; y < operation_height; y++) { \ +- for (x = 0; x < operation_width; x++) { \ ++ for (y = 0; y < height; y++) { \ ++ for (x = 0; x < width; x++) { \ + _pixel_type val; \ + \ + if (rtl) { \ +- index_s = ((src_y - y) * src_width + src_x - x) * _bpp; \ +- index_d = ((dst_y - y) * dst_width + dst_x - x) * _bpp; \ ++ index_s = ((src_y - y) * src_pitch + src_x - x) * _bpp; \ ++ index_d = ((dst_y - y) * dst_pitch + dst_x - x) * _bpp; \ + } else { \ +- index_s = ((src_y + y) * src_width + src_x + x) * _bpp; \ +- index_d = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ ++ index_s = ((src_y + y) * src_pitch + src_x + x) * _bpp; \ ++ index_d = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ + } \ + if (rop_mode == 1 && rop == 5) { \ + /* Invert dest */ \ +@@ -783,7 +782,7 @@ static void sm501_2d_operation(SM501State *s) + } \ + } \ + } +- switch (format_flags) { ++ switch (format) { + case 0: + COPY_AREA(1, uint8_t, rtl); + break; +@@ -799,15 +798,15 @@ static void sm501_2d_operation(SM501State *s) + case 0x01: /* fill rectangle */ + #define FILL_RECT(_bpp, _pixel_type) { \ + int y, x; \ +- for (y = 0; y < operation_height; y++) { \ +- for (x = 0; x < operation_width; x++) { \ +- int index = ((dst_y + y) * dst_width + dst_x + x) * _bpp; \ ++ for (y = 0; y < height; y++) { \ ++ for (x = 0; x < width; x++) { \ ++ int index = ((dst_y + y) * dst_pitch + dst_x + x) * _bpp; \ + *(_pixel_type *)&dst[index] = (_pixel_type)color; \ + } \ + } \ + } + +- switch (format_flags) { ++ switch (format) { + case 0: + FILL_RECT(1, uint8_t); + break; +@@ -824,14 +823,14 @@ static void sm501_2d_operation(SM501State *s) + + default: + qemu_log_mask(LOG_UNIMP, "sm501: not implemented 2D operation: %d\n", +- operation); ++ cmd); + return; + } + + if (dst_base >= get_fb_addr(s, crt) && + dst_base <= get_fb_addr(s, crt) + fb_len) { +- int dst_len = MIN(fb_len, ((dst_y + operation_height - 1) * dst_width + +- dst_x + operation_width) * (1 << format_flags)); ++ int dst_len = MIN(fb_len, ((dst_y + height - 1) * dst_pitch + ++ dst_x + width) * (1 << format)); + if (dst_len) { + memory_region_set_dirty(&s->local_mem_region, dst_base, dst_len); + } +-- +2.23.0 + diff --git a/sm501-Use-BIT-x-macro-to-shorten-constant.patch b/sm501-Use-BIT-x-macro-to-shorten-constant.patch new file mode 100644 index 0000000000000000000000000000000000000000..697d0ee61d5cba82611b05f037a25397de5073b6 --- /dev/null +++ b/sm501-Use-BIT-x-macro-to-shorten-constant.patch @@ -0,0 +1,42 @@ +From 9f1e9012047639121eb275a4f8f5693d340e91f6 Mon Sep 17 00:00:00 2001 +From: BALATON Zoltan +Date: Thu, 21 May 2020 21:39:44 +0200 +Subject: [PATCH] sm501: Use BIT(x) macro to shorten constant +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 124bf5de8d7cf503b32b377d0445029a76bfbd49.1590089984.git.balaton@eik.bme.hu +Signed-off-by: Gerd Hoffmann +--- + hw/display/sm501.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/display/sm501.c b/hw/display/sm501.c +index 51e7ccc39d..f3d11d0b23 100644 +--- a/hw/display/sm501.c ++++ b/hw/display/sm501.c +@@ -701,7 +701,7 @@ static void sm501_2d_operation(SM501State *s) + { + /* obtain operation parameters */ + int cmd = (s->twoD_control >> 16) & 0x1F; +- int rtl = s->twoD_control & 0x8000000; ++ int rtl = s->twoD_control & BIT(27); + int src_x = (s->twoD_source >> 16) & 0x01FFF; + int src_y = s->twoD_source & 0xFFFF; + int dst_x = (s->twoD_destination >> 16) & 0x01FFF; +@@ -751,8 +751,7 @@ static void sm501_2d_operation(SM501State *s) + } + } + +- if ((s->twoD_source_base & 0x08000000) || +- (s->twoD_destination_base & 0x08000000)) { ++ if (s->twoD_source_base & BIT(27) || s->twoD_destination_base & BIT(27)) { + qemu_log_mask(LOG_UNIMP, "sm501: only local memory is supported.\n"); + return; + } +-- +2.23.0 + diff --git a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch index c45af3e6aa3ba4b5c58583f5605c7a6b9f90e508..cfd1842d3f199ec1db6e324e448df37b4553ea2a 100644 --- a/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch +++ b/smbios-Add-missing-member-of-type-4-for-smbios-3.0.patch @@ -1,21 +1,19 @@ -From e52fdbd850b49304c5bbd5f19c9f518b80efef42 Mon Sep 17 00:00:00 2001 -From: zhanghailiang -Date: Wed, 31 Jul 2019 15:40:55 +0800 +From 2b8ad77678da175cb92c902955cb85827e661de3 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 14 Apr 2020 14:53:44 +0800 Subject: [PATCH] smbios: Add missing member of type 4 for smbios 3.0 According to smbios 3.0 spec, for processor information (type 4), -it adds three new members (Core Count 2, Core enabled 2, thread count 2) for 3.0, - -Without this three members, we can not get correct cpu frequency from dmi, +it adds three new members (Core Count 2, Core enabled 2, thread count 2) for 3.0, Without this three members, we can not get correct cpu frequency from dmi, Because it will failed to check the length of Processor Infomation in DMI. The corresponding codes in kernel is like: - if (dm->type == DMI_ENTRY_PROCESSOR && - dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { - u16 val = (u16)get_unaligned((const u16 *) - (dmi_data + DMI_PROCESSOR_MAX_SPEED)); - *mhz = val > *mhz ? val : *mhz; - } + if (dm->type == DMI_ENTRY_PROCESSOR && + dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) { + u16 val = (u16)get_unaligned((const u16 *) + (dmi_data + DMI_PROCESSOR_MAX_SPEED)); + *mhz = val > *mhz ? val : *mhz; + } Signed-off-by: zhanghailiang --- @@ -24,11 +22,11 @@ Signed-off-by: zhanghailiang 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index 47be9071..b11ec6e3 100644 +index 7bcd67b0..51b00d44 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c -@@ -600,7 +600,9 @@ static void smbios_build_type_4_table(unsigned instance) - t->thread_count = smp_threads; +@@ -603,7 +603,9 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) + t->thread_count = ms->smp.threads; t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ - @@ -39,7 +37,7 @@ index 47be9071..b11ec6e3 100644 smbios_type4_count++; } diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h -index 6fef32a3..70eb7304 100644 +index 02a0ced0..6887bca4 100644 --- a/include/hw/firmware/smbios.h +++ b/include/hw/firmware/smbios.h @@ -193,6 +193,9 @@ struct smbios_type_4 { @@ -50,8 +48,7 @@ index 6fef32a3..70eb7304 100644 + uint16_t enabledcorecount2; + uint16_t threadcount2; } QEMU_PACKED; - - /* SMBIOS type 11 - OEM strings */ --- -2.19.1 + /* SMBIOS type 11 - OEM strings */ +-- +2.23.0 diff --git a/spapr-Implement-get_dt_compatible-callback.patch b/spapr-Implement-get_dt_compatible-callback.patch new file mode 100644 index 0000000000000000000000000000000000000000..e64a8746f498a68085824f6cace1bb2e958ce7c7 --- /dev/null +++ b/spapr-Implement-get_dt_compatible-callback.patch @@ -0,0 +1,68 @@ +From c520d8e823431be94268daa2a911e224cab81521 Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Tue, 21 Jan 2020 10:29:31 -0500 +Subject: [PATCH 04/19] spapr: Implement get_dt_compatible() callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For devices that cannot be statically initialized, implement a +get_dt_compatible() callback that allows us to ask the device for +the 'compatible' value. + +Signed-off-by: Stefan Berger +Reviewed-by: Marc-André Lureau +Reviewed-by: David Gibson +Message-Id: <20200121152935.649898-3-stefanb@linux.ibm.com> +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + hw/ppc/spapr_vio.c | 11 +++++++++-- + include/hw/ppc/spapr_vio.h | 1 + + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c +index 583c13de..4e50916f 100644 +--- a/hw/ppc/spapr_vio.c ++++ b/hw/ppc/spapr_vio.c +@@ -89,6 +89,7 @@ static int vio_make_devnode(SpaprVioDevice *dev, + SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + int vdevice_off, node_off, ret; + char *dt_name; ++ const char *dt_compatible; + + vdevice_off = fdt_path_offset(fdt, "/vdevice"); + if (vdevice_off < 0) { +@@ -115,9 +116,15 @@ static int vio_make_devnode(SpaprVioDevice *dev, + } + } + +- if (pc->dt_compatible) { ++ if (pc->get_dt_compatible) { ++ dt_compatible = pc->get_dt_compatible(dev); ++ } else { ++ dt_compatible = pc->dt_compatible; ++ } ++ ++ if (dt_compatible) { + ret = fdt_setprop_string(fdt, node_off, "compatible", +- pc->dt_compatible); ++ dt_compatible); + if (ret < 0) { + return ret; + } +diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h +index 04609f21..97951fc6 100644 +--- a/include/hw/ppc/spapr_vio.h ++++ b/include/hw/ppc/spapr_vio.h +@@ -56,6 +56,7 @@ typedef struct SpaprVioDeviceClass { + void (*realize)(SpaprVioDevice *dev, Error **errp); + void (*reset)(SpaprVioDevice *dev); + int (*devnode)(SpaprVioDevice *dev, void *fdt, int node_off); ++ const char *(*get_dt_compatible)(SpaprVioDevice *dev); + } SpaprVioDeviceClass; + + struct SpaprVioDevice { +-- +2.23.0 + diff --git a/spapr_pci-add-spapr-msi-read-method.patch b/spapr_pci-add-spapr-msi-read-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..2cc4994f09171252daf5a435832480151c458f4b --- /dev/null +++ b/spapr_pci-add-spapr-msi-read-method.patch @@ -0,0 +1,61 @@ +From cbbcd56e090a59d0eaa4e35ed0efb24d6dd1003e Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:23:24 +0800 +Subject: [PATCH] spapr_pci: add spapr msi read method + +fix CVE-2020-15469 + +Add spapr msi mmio read method to avoid NULL pointer dereference +issue. + +Reported-by: Lei Sun +Acked-by: David Gibson +Reviewed-by: Li Qiang +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/ppc/spapr_pci.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c +index 9003fe9010..1571e049ab 100644 +--- a/hw/ppc/spapr_pci.c ++++ b/hw/ppc/spapr_pci.c +@@ -50,6 +50,7 @@ + #include "sysemu/kvm.h" + #include "sysemu/hostmem.h" + #include "sysemu/numa.h" ++#include "qemu/log.h" + + /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ + #define RTAS_QUERY_FN 0 +@@ -743,6 +744,12 @@ static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) + return route; + } + ++static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size) ++{ ++ qemu_log_mask(LOG_UNIMP, "%s not implemented\n", __func__); ++ return 0; ++} ++ + /* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. +@@ -760,8 +767,10 @@ static void spapr_msi_write(void *opaque, hwaddr addr, + } + + static const MemoryRegionOps spapr_msi_ops = { +- /* There is no .read as the read result is undefined by PCI spec */ +- .read = NULL, ++ /* .read result is undefined by PCI spec ++ * define .read method to avoid assert failure in memory_region_init_io ++ */ ++ .read = spapr_msi_read, + .write = spapr_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN + }; +-- +2.27.0 + diff --git a/ssi-Fix-bad-printf-format-specifiers.patch b/ssi-Fix-bad-printf-format-specifiers.patch new file mode 100644 index 0000000000000000000000000000000000000000..811a14da46a4e55ca324209309774973b563b70c --- /dev/null +++ b/ssi-Fix-bad-printf-format-specifiers.patch @@ -0,0 +1,48 @@ +From 073457a45eaccd2beac3c94c53a449b8f683501e Mon Sep 17 00:00:00 2001 +From: AlexChen +Date: Wed, 4 Nov 2020 18:22:45 +0800 +Subject: [PATCH] ssi: Fix bad printf format specifiers + +We should use printf format specifier "%u" instead of "%d" for +argument of type "unsigned int". + +Reported-by: Euler Robot +Signed-off-by: Alex Chen +Reviewed-by: Alistair Francis +Message-id: 5FA280F5.8060902@huawei.com +Signed-off-by: Peter Maydell +(cherry-picked from commit 9df0a97298) +--- + hw/ssi/imx_spi.c | 2 +- + hw/ssi/xilinx_spi.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c +index 5cec9b5d05..0b3052bdf9 100644 +--- a/hw/ssi/imx_spi.c ++++ b/hw/ssi/imx_spi.c +@@ -52,7 +52,7 @@ static const char *imx_spi_reg_name(uint32_t reg) + case ECSPI_MSGDATA: + return "ECSPI_MSGDATA"; + default: +- sprintf(unknown, "%d ?", reg); ++ sprintf(unknown, "%u ?", reg); + return unknown; + } + } +diff --git a/hw/ssi/xilinx_spi.c b/hw/ssi/xilinx_spi.c +index 1379cb164b..d2b69d027a 100644 +--- a/hw/ssi/xilinx_spi.c ++++ b/hw/ssi/xilinx_spi.c +@@ -139,7 +139,7 @@ static void xlx_spi_update_irq(XilinxSPI *s) + irq chain unless things really changed. */ + if (pending != s->irqline) { + s->irqline = pending; +- DB_PRINT("irq_change of state %d ISR:%x IER:%X\n", ++ DB_PRINT("irq_change of state %u ISR:%x IER:%X\n", + pending, s->regs[R_IPISR], s->regs[R_IPIER]); + qemu_set_irq(s->irq, pending); + } +-- +2.27.0 + diff --git a/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch b/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch new file mode 100644 index 0000000000000000000000000000000000000000..4047145033d7010acfb3cfb002feb920fb303f0d --- /dev/null +++ b/target-arm-Add-CPU-features-to-query-cpu-model-expan.patch @@ -0,0 +1,89 @@ +From 274d25bdb2df13a26ad6d2a8a06fcc281a22f642 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:58 +0800 +Subject: [PATCH 7/9] target/arm: Add CPU features to query-cpu-model-expansion + +Add CPU features to the result of query-cpu-model-expansion so that +other applications (such as libvirt) can know the supported CPU +features. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu.c | 27 +++++++++++++++++++++++++++ + target/arm/cpu.h | 2 ++ + target/arm/monitor.c | 2 ++ + 3 files changed, 31 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index db46afba..dcf9f49e 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -25,6 +25,8 @@ + #include "qemu/module.h" + #include "qapi/error.h" + #include "qapi/visitor.h" ++#include "qapi/qmp/qdict.h" ++#include "qom/qom-qobject.h" + #include "cpu.h" + #include "internals.h" + #include "exec/exec-all.h" +@@ -1403,6 +1405,31 @@ static const CPUFeatureDep feature_dependencies[] = { + }, + }; + ++void arm_cpu_features_to_dict(ARMCPU *cpu, QDict *features) ++{ ++ Object *obj = OBJECT(cpu); ++ const char *name; ++ ObjectProperty *prop; ++ bool is_32bit = !arm_feature(&cpu->env, ARM_FEATURE_AARCH64); ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(cpu_features); ++i) { ++ if (is_32bit != cpu_features[i].is_32bit) { ++ continue; ++ } ++ ++ name = cpu_features[i].name; ++ prop = object_property_find(obj, name, NULL); ++ if (prop) { ++ QObject *value; ++ ++ assert(prop->get); ++ value = object_property_get_qobject(obj, name, &error_abort); ++ qdict_put_obj(features, name, value); ++ } ++ } ++} ++ + static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 7bb481fb..068c3fa2 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -3692,4 +3692,6 @@ static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) + #define cpu_isar_feature(name, cpu) \ + ({ ARMCPU *cpu_ = (cpu); isar_feature_##name(&cpu_->isar); }) + ++void arm_cpu_features_to_dict(ARMCPU *cpu, QDict *features); ++ + #endif +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index e2b1d117..7c2ff3c0 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -219,6 +219,8 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + } + } + ++ arm_cpu_features_to_dict(ARM_CPU(obj), qdict_out); ++ + if (!qdict_size(qdict_out)) { + qobject_unref(qdict_out); + } else { +-- +2.25.1 + diff --git a/target-arm-Add-ID_AA64MMFR2_EL1.patch b/target-arm-Add-ID_AA64MMFR2_EL1.patch new file mode 100644 index 0000000000000000000000000000000000000000..eee33ae241bde2333d5308c7ca39297782598ccc --- /dev/null +++ b/target-arm-Add-ID_AA64MMFR2_EL1.patch @@ -0,0 +1,87 @@ +From 3451fb922aa7b0fe532e508ca13d4ab4b3ec75bf Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 8 Feb 2020 12:58:13 +0000 +Subject: [PATCH 02/13] target/arm: Add ID_AA64MMFR2_EL1 + +Add definitions for all of the fields, up to ARMv8.5. +Convert the existing RESERVED register to a full register. +Query KVM for the value of the register for the host. + +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-id: 20200208125816.14954-18-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +--- + target/arm/cpu.h | 17 +++++++++++++++++ + target/arm/helper.c | 4 ++-- + target/arm/kvm64.c | 2 ++ + 3 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index fe310828..3e65bc50 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -866,6 +866,7 @@ struct ARMCPU { + uint64_t id_aa64pfr1; + uint64_t id_aa64mmfr0; + uint64_t id_aa64mmfr1; ++ uint64_t id_aa64mmfr2; + } isar; + uint32_t midr; + uint32_t revidr; +@@ -1762,6 +1763,22 @@ FIELD(ID_AA64MMFR1, PAN, 20, 4) + FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) + FIELD(ID_AA64MMFR1, XNX, 28, 4) + ++FIELD(ID_AA64MMFR2, CNP, 0, 4) ++FIELD(ID_AA64MMFR2, UAO, 4, 4) ++FIELD(ID_AA64MMFR2, LSM, 8, 4) ++FIELD(ID_AA64MMFR2, IESB, 12, 4) ++FIELD(ID_AA64MMFR2, VARANGE, 16, 4) ++FIELD(ID_AA64MMFR2, CCIDX, 20, 4) ++FIELD(ID_AA64MMFR2, NV, 24, 4) ++FIELD(ID_AA64MMFR2, ST, 28, 4) ++FIELD(ID_AA64MMFR2, AT, 32, 4) ++FIELD(ID_AA64MMFR2, IDS, 36, 4) ++FIELD(ID_AA64MMFR2, FWB, 40, 4) ++FIELD(ID_AA64MMFR2, TTL, 48, 4) ++FIELD(ID_AA64MMFR2, BBM, 52, 4) ++FIELD(ID_AA64MMFR2, EVT, 56, 4) ++FIELD(ID_AA64MMFR2, E0PD, 60, 4) ++ + FIELD(ID_DFR0, COPDBG, 0, 4) + FIELD(ID_DFR0, COPSDBG, 4, 4) + FIELD(ID_DFR0, MMAPDBG, 8, 4) +diff --git a/target/arm/helper.c b/target/arm/helper.c +index b74c23a9..c50b1ba1 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -6182,10 +6182,10 @@ void register_cp_regs_for_features(ARMCPU *cpu) + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, + .resetvalue = cpu->isar.id_aa64mmfr1 }, +- { .name = "ID_AA64MMFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, ++ { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = 0 }, ++ .resetvalue = cpu->isar.id_aa64mmfr2 }, + { .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 4f0bf000..b794108a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -541,6 +541,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ARM64_SYS_REG(3, 0, 0, 7, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, + ARM64_SYS_REG(3, 0, 0, 7, 1)); ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, ++ ARM64_SYS_REG(3, 0, 0, 7, 2)); + + /* + * Note that if AArch32 support is not present in the host, +-- +2.25.1 + diff --git a/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch b/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch new file mode 100644 index 0000000000000000000000000000000000000000..7516ed8108de271970e600dbd03c964611b3b0ba --- /dev/null +++ b/target-arm-Add-_aa64_-and-_any_-versions-of-pmu_8_1-.patch @@ -0,0 +1,166 @@ +From 515975da851ca9567053bcf0487fde4447dfdc4f Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:04 +0000 +Subject: [PATCH 06/13] target/arm: Add _aa64_ and _any_ versions of pmu_8_1 + isar checks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add the 64-bit version of the "is this a v8.1 PMUv3?" +ID register check function, and the _any_ version that +checks for either AArch32 or AArch64 support. We'll use +this in a later commit. + +We don't (yet) do any isar_feature checks on ID_AA64DFR1_EL1, +but we move id_aa64dfr1 into the ARMISARegisters struct with +id_aa64dfr0, for consistency. + +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Peter Maydell +Message-id: 20200214175116.9164-10-peter.maydell@linaro.org +--- + target/arm/cpu.c | 3 ++- + target/arm/cpu.h | 15 +++++++++++++-- + target/arm/cpu64.c | 8 ++++---- + target/arm/helper.c | 12 +++++++----- + 4 files changed, 26 insertions(+), 12 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 7e9b85a2..bb2edf4e 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1522,7 +1522,8 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + cpu); + #endif + } else { +- cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); ++ cpu->isar.id_aa64dfr0 = ++ FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); + cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 2d8d27e8..230130be 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -868,6 +868,8 @@ struct ARMCPU { + uint64_t id_aa64mmfr0; + uint64_t id_aa64mmfr1; + uint64_t id_aa64mmfr2; ++ uint64_t id_aa64dfr0; ++ uint64_t id_aa64dfr1; + } isar; + uint32_t midr; + uint32_t revidr; +@@ -884,8 +886,6 @@ struct ARMCPU { + uint32_t id_mmfr2; + uint32_t id_mmfr3; + uint32_t id_mmfr4; +- uint64_t id_aa64dfr0; +- uint64_t id_aa64dfr1; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; + uint32_t dbgdidr; +@@ -3657,6 +3657,17 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; + } + ++static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) ++{ ++ return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && ++ FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; ++} ++ ++static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) ++{ ++ return isar_feature_aa64_pmu_8_1(id) || isar_feature_aa32_pmu_8_1(id); ++} ++ + /* + * Forward to the above feature tests given an ARMCPU pointer. + */ +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index afdabbeb..aa96548f 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -137,7 +137,7 @@ static void aarch64_a57_initfn(Object *obj) + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; + cpu->dbgdidr = 0x3516d000; +@@ -191,7 +191,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_isar6 = 0; + cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ + cpu->dbgdidr = 0x3516d000; +@@ -244,7 +244,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->isar.id_isar4 = 0x00011142; + cpu->isar.id_isar5 = 0x00011121; + cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; + cpu->dbgdidr = 0x3516d000; +@@ -276,7 +276,7 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + cpu->midr = 0x480fd010; + cpu->ctr = 0x84448004; + cpu->isar.id_aa64pfr0 = 0x11001111; +- cpu->id_aa64dfr0 = 0x110305408; ++ cpu->isar.id_aa64dfr0 = 0x110305408; + cpu->isar.id_aa64isar0 = 0x10211120; + cpu->isar.id_aa64mmfr0 = 0x101125; + } +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 3f06ca19..a71f4ef6 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -23,6 +23,7 @@ + #include "hw/semihosting/semihost.h" + #include "sysemu/cpus.h" + #include "sysemu/kvm.h" ++#include "sysemu/tcg.h" + #include "qemu/range.h" + #include "qapi/qapi-commands-machine-target.h" + #include "qapi/error.h" +@@ -5611,9 +5612,10 @@ static void define_debug_regs(ARMCPU *cpu) + * check that if they both exist then they agree. + */ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); +- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); +- assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) == ctx_cmps); ++ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); ++ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); ++ assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) ++ == ctx_cmps); + } + + define_one_arm_cp_reg(cpu, &dbgdidr); +@@ -6112,11 +6114,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_aa64dfr0 }, ++ .resetvalue = cpu->isar.id_aa64dfr0 }, + { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_aa64dfr1 }, ++ .resetvalue = cpu->isar.id_aa64dfr1 }, + { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +-- +2.25.1 + diff --git a/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch b/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch new file mode 100644 index 0000000000000000000000000000000000000000..66e4ec4ad078aacdd4e7cb9a76244e1460487551 --- /dev/null +++ b/target-arm-Add-and-use-FIELD-definitions-for-ID_AA64.patch @@ -0,0 +1,76 @@ +From 4001f3040937094660eab44dbb49b86817317ea9 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:01 +0000 +Subject: [PATCH 03/13] target/arm: Add and use FIELD definitions for + ID_AA64DFR0_EL1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add FIELD() definitions for the ID_AA64DFR0_EL1 and use them +where we currently have hard-coded bit values. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Signed-off-by: Peter Maydell +Message-id: 20200214175116.9164-7-peter.maydell@linaro.org +--- + target/arm/cpu.c | 2 +- + target/arm/cpu.h | 10 ++++++++++ + target/arm/helper.c | 6 +++--- + 3 files changed, 14 insertions(+), 4 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 811e5c63..dbd05e01 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1522,7 +1522,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + cpu); + #endif + } else { +- cpu->id_aa64dfr0 &= ~0xf00; ++ cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); + cpu->id_dfr0 &= ~(0xf << 24); + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 3e65bc50..91cc02b4 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1779,6 +1779,16 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4) + FIELD(ID_AA64MMFR2, EVT, 56, 4) + FIELD(ID_AA64MMFR2, E0PD, 60, 4) + ++FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) ++FIELD(ID_AA64DFR0, TRACEVER, 4, 4) ++FIELD(ID_AA64DFR0, PMUVER, 8, 4) ++FIELD(ID_AA64DFR0, BRPS, 12, 4) ++FIELD(ID_AA64DFR0, WRPS, 20, 4) ++FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) ++FIELD(ID_AA64DFR0, PMSVER, 32, 4) ++FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) ++FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) ++ + FIELD(ID_DFR0, COPDBG, 0, 4) + FIELD(ID_DFR0, COPSDBG, 4, 4) + FIELD(ID_DFR0, MMAPDBG, 8, 4) +diff --git a/target/arm/helper.c b/target/arm/helper.c +index c50b1ba1..419be640 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5611,9 +5611,9 @@ static void define_debug_regs(ARMCPU *cpu) + * check that if they both exist then they agree. + */ + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- assert(extract32(cpu->id_aa64dfr0, 12, 4) == brps); +- assert(extract32(cpu->id_aa64dfr0, 20, 4) == wrps); +- assert(extract32(cpu->id_aa64dfr0, 28, 4) == ctx_cmps); ++ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); ++ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); ++ assert(FIELD_EX64(cpu->id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) == ctx_cmps); + } + + define_one_arm_cp_reg(cpu, &dbgdidr); +-- +2.25.1 + diff --git a/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch b/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6e29be12e1bbf6ef55d43bf35960e2168fc51e8 --- /dev/null +++ b/target-arm-Add-isar_feature-tests-for-PAN-ATS1E1.patch @@ -0,0 +1,77 @@ +From 6f18e959eabf9c752659eb3851f193bf343346c5 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 8 Feb 2020 12:57:59 +0000 +Subject: [PATCH 01/13] target/arm: Add isar_feature tests for PAN + ATS1E1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Include definitions for all of the bits in ID_MMFR3. +We already have a definition for ID_AA64MMFR1.PAN. + +Reviewed-by: Alex Bennée +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-id: 20200208125816.14954-4-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +--- + target/arm/cpu.h | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 86eb79cd..fe310828 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1680,6 +1680,15 @@ FIELD(ID_ISAR6, FHM, 8, 4) + FIELD(ID_ISAR6, SB, 12, 4) + FIELD(ID_ISAR6, SPECRES, 16, 4) + ++FIELD(ID_MMFR3, CMAINTVA, 0, 4) ++FIELD(ID_MMFR3, CMAINTSW, 4, 4) ++FIELD(ID_MMFR3, BPMAINT, 8, 4) ++FIELD(ID_MMFR3, MAINTBCST, 12, 4) ++FIELD(ID_MMFR3, PAN, 16, 4) ++FIELD(ID_MMFR3, COHWALK, 20, 4) ++FIELD(ID_MMFR3, CMEMSZ, 24, 4) ++FIELD(ID_MMFR3, SUPERSEC, 28, 4) ++ + FIELD(ID_MMFR4, SPECSEI, 0, 4) + FIELD(ID_MMFR4, AC2, 4, 4) + FIELD(ID_MMFR4, XNX, 8, 4) +@@ -3445,6 +3454,16 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) + return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; + } + ++static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) ++{ ++ return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0; ++} ++ ++static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) ++{ ++ return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; ++} ++ + /* + * 64-bit feature tests via id registers. + */ +@@ -3589,6 +3608,16 @@ static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; + } + ++static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) ++{ ++ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; ++} ++ ++static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) ++{ ++ return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; ++} ++ + static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) + { + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; +-- +2.25.1 + diff --git a/target-arm-Add-more-CPU-features.patch b/target-arm-Add-more-CPU-features.patch new file mode 100644 index 0000000000000000000000000000000000000000..a22e5177300d305df8c0430ee21e29c587bd5399 --- /dev/null +++ b/target-arm-Add-more-CPU-features.patch @@ -0,0 +1,30 @@ +From 3eee1e4ff1ca342e760f759c727abc41780d0afa Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Tue, 11 Aug 2020 10:28:10 +0800 +Subject: [PATCH 9/9] target/arm: Add more CPU features + +Add i8mm, bf16, and dgh CPU features for AArch64. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index dcf9f49e..7ae2d3da 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1132,6 +1132,9 @@ static struct CPUFeatureInfo cpu_features[] = { + FIELD_INFO("fhm", ID_ISAR6, FHM, false, 1, 0, true), + FIELD_INFO("sb", ID_ISAR6, SB, false, 1, 0, true), + FIELD_INFO("specres", ID_ISAR6, SPECRES, false, 1, 0, true), ++ FIELD_INFO("i8mm", ID_AA64ISAR1, I8MM, false, 1, 0, false), ++ FIELD_INFO("bf16", ID_AA64ISAR1, BF16, false, 1, 0, false), ++ FIELD_INFO("dgh", ID_AA64ISAR1, DGH, false, 1, 0, false), + + FIELD_INFO("cmaintva", ID_MMFR3, CMAINTVA, false, 1, 0, true), + FIELD_INFO("cmaintsw", ID_MMFR3, CMAINTSW, false, 1, 0, true), +-- +2.25.1 + diff --git a/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch b/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch new file mode 100644 index 0000000000000000000000000000000000000000..49c7dc63022ec1196b8c225b1c5291fbbe10e1ad --- /dev/null +++ b/target-arm-Add-the-kvm_adjvtime-vcpu-property-for-Co.patch @@ -0,0 +1,27 @@ +From 427975fbc87c3d999ee4d13b65a95ba496c148d6 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 29 May 2020 11:02:44 +0800 +Subject: [PATCH] target/arm: Add the kvm_adjvtime vcpu property for Cortex-A72 + +Add the kvm_adjvtime vcpu property for ARM Cortex-A72 cpu model, +so that virtual time adjust will be enabled for it. + +Signed-off-by: Ying Fang + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index b30ca7c9..15f4ee92 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -257,6 +257,9 @@ static void aarch64_a72_initfn(Object *obj) + cpu->gic_vpribits = 5; + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); ++ if(kvm_enabled()) { ++ kvm_arm_add_vcpu_properties(obj); ++ } + } + + static void aarch64_kunpeng_920_initfn(Object *obj) +-- +2.23.0 + diff --git a/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch b/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch new file mode 100644 index 0000000000000000000000000000000000000000..81ad2961b00130a741079e0f38c56b7dffdcf803 --- /dev/null +++ b/target-arm-Allow-ID-registers-to-synchronize-to-KVM.patch @@ -0,0 +1,160 @@ +From 79a60f0eeb56faf5d162ca566d1cd9988c3e4d60 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:40 +0800 +Subject: [PATCH 4/9] target/arm: Allow ID registers to synchronize to KVM + +There are 2 steps to synchronize the values of system registers from +CPU state to KVM: +1. write to the values of system registers from CPU state to + (index,value) list by write_cpustate_to_list; +2. write the values in (index,value) list to KVM by + write_list_to_kvmstate; + +In step 1, the values of constant system registers are not allowed to +write to (index,value) list. However, a constant system register is +CONSTANT for guest but not for QEMU, which means, QEMU can set/modify +the value of constant system registers that is different from phsical +registers when startup. But if KVM is enabled, guest can not read the +values of the system registers which QEMU set unless they can be written +to (index,value) list. And why not try to write to KVM if kvm_sync is +true? + +At the moment we call write_cpustate_to_list, all ID registers are +contant, including ID_PFR1_EL1 and ID_AA64PFR0_EL1 because GIC has been +initialized. Hence, let's give all ID registers a chance to write to +KVM. If the write is successful, then write to (index,value) list. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/helper.c | 31 ++++++++++++++++++++----------- + target/arm/kvm.c | 38 ++++++++++++++++++++++++++++++++++++++ + target/arm/kvm_arm.h | 3 +++ + 3 files changed, 61 insertions(+), 11 deletions(-) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 459af431..97b6b861 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -32,6 +32,7 @@ + #include "arm_ldst.h" + #include "exec/cpu_ldst.h" + #endif ++#include "kvm_arm.h" + + #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ + +@@ -267,30 +268,38 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + ok = false; + continue; + } +- if (ri->type & ARM_CP_NO_RAW) { ++ /* ++ * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), ++ * where 1<=crm<8, 0<=op2<8. Let's give ID registers a chance to ++ * synchronize to kvm. ++ */ ++ if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && ++ ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && ri->crm > 0)) { + continue; + } + + newval = read_raw_cp_reg(&cpu->env, ri); + if (kvm_sync) { +- /* +- * Only sync if the previous list->cpustate sync succeeded. +- * Rather than tracking the success/failure state for every +- * item in the list, we just recheck "does the raw write we must +- * have made in write_list_to_cpustate() read back OK" here. +- */ +- uint64_t oldval = cpu->cpreg_values[i]; ++ /* Only sync if we can sync to KVM successfully. */ ++ uint64_t oldval; ++ uint64_t kvmval; + ++ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { ++ continue; ++ } + if (oldval == newval) { + continue; + } + +- write_raw_cp_reg(&cpu->env, ri, oldval); +- if (read_raw_cp_reg(&cpu->env, ri) != oldval) { ++ if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { ++ continue; ++ } ++ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || ++ kvmval != newval) { + continue; + } + +- write_raw_cp_reg(&cpu->env, ri, newval); ++ kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); + } + cpu->cpreg_values[i] = newval; + } +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 4f131f68..229b17ce 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -457,6 +457,44 @@ out: + return ret; + } + ++int kvm_arm_get_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *target) ++{ ++ uint32_t v32; ++ int ret; ++ ++ switch (regidx & KVM_REG_SIZE_MASK) { ++ case KVM_REG_SIZE_U32: ++ ret = kvm_get_one_reg(CPU(cpu), regidx, &v32); ++ if (ret == 0) { ++ *target = v32; ++ } ++ return ret; ++ case KVM_REG_SIZE_U64: ++ return kvm_get_one_reg(CPU(cpu), regidx, target); ++ default: ++ return -1; ++ } ++} ++ ++int kvm_arm_set_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *source) ++{ ++ uint32_t v32; ++ ++ switch (regidx & KVM_REG_SIZE_MASK) { ++ case KVM_REG_SIZE_U32: ++ v32 = *source; ++ if (v32 != *source) { ++ error_report("the value of source is too large"); ++ return -1; ++ } ++ return kvm_set_one_reg(CPU(cpu), regidx, &v32); ++ case KVM_REG_SIZE_U64: ++ return kvm_set_one_reg(CPU(cpu), regidx, source); ++ default: ++ return -1; ++ } ++} ++ + bool write_kvmstate_to_list(ARMCPU *cpu) + { + CPUState *cs = CPU(cpu); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 0de5f83e..9b7104d6 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -400,4 +400,7 @@ static inline const char *its_class_name(void) + } + } + ++int kvm_arm_get_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *target); ++int kvm_arm_set_one_reg(ARMCPU *cpu, uint64_t regidx, uint64_t *source); ++ + #endif +-- +2.25.1 + diff --git a/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch b/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch new file mode 100644 index 0000000000000000000000000000000000000000..ca4b796b58600aa35771d26a247690dfca413cc9 --- /dev/null +++ b/target-arm-Allow-reading-flags-from-FPSCR-for-M-prof.patch @@ -0,0 +1,41 @@ +From cdc6896659b85f7ed8f7552850312e55170de0c5 Mon Sep 17 00:00:00 2001 +From: Christophe Lyon +Date: Fri, 25 Oct 2019 11:57:11 +0200 +Subject: [PATCH] target/arm: Allow reading flags from FPSCR for M-profile + +rt==15 is a special case when reading the flags: it means the +destination is APSR. This patch avoids rejecting +vmrs apsr_nzcv, fpscr +as illegal instruction. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Christophe Lyon +Message-id: 20191025095711.10853-1-christophe.lyon@linaro.org +[PMM: updated the comment] +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +(cherry picked from commit 2529ab43b8a05534494704e803e0332d111d8b91) +Signed-off-by: Michael Roth +--- + target/arm/translate-vfp.inc.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c +index ef45cecbea..75406fd9db 100644 +--- a/target/arm/translate-vfp.inc.c ++++ b/target/arm/translate-vfp.inc.c +@@ -704,9 +704,10 @@ static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) + if (arm_dc_feature(s, ARM_FEATURE_M)) { + /* + * The only M-profile VFP vmrs/vmsr sysreg is FPSCR. +- * Writes to R15 are UNPREDICTABLE; we choose to undef. ++ * Accesses to R15 are UNPREDICTABLE; we choose to undef. ++ * (FPSCR -> r15 is a special case which writes to the PSR flags.) + */ +- if (a->rt == 15 || a->reg != ARM_VFP_FPSCR) { ++ if (a->rt == 15 && (!a->l || a->reg != ARM_VFP_FPSCR)) { + return false; + } + } +-- +2.23.0 diff --git a/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch b/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch new file mode 100644 index 0000000000000000000000000000000000000000..bfcce54936d4cb8c8ca1de997a6d0d469dab3bc1 --- /dev/null +++ b/target-arm-Define-an-aa32_pmu_8_1-isar-feature-test-.patch @@ -0,0 +1,248 @@ +From 2eded1a4deeb5dd8d28414e54948bcf773f6b540 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:03 +0000 +Subject: [PATCH 05/13] target/arm: Define an aa32_pmu_8_1 isar feature test + function + +Instead of open-coding a check on the ID_DFR0 PerfMon ID register +field, create a standardly-named isar_feature for "does AArch32 have +a v8.1 PMUv3" and use it. + +This entails moving the id_dfr0 field into the ARMISARegisters struct. + +Reviewed-by: Richard Henderson +Signed-off-by: Peter Maydell +Message-id: 20200214175116.9164-9-peter.maydell@linaro.org +--- + hw/intc/armv7m_nvic.c | 2 +- + target/arm/cpu.c | 26 +++++++++++++------------- + target/arm/cpu.h | 9 ++++++++- + target/arm/cpu64.c | 6 +++--- + target/arm/helper.c | 5 ++--- + 5 files changed, 27 insertions(+), 21 deletions(-) + +diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c +index 9f8f0d3f..0741db7b 100644 +--- a/hw/intc/armv7m_nvic.c ++++ b/hw/intc/armv7m_nvic.c +@@ -1223,7 +1223,7 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) + case 0xd44: /* PFR1. */ + return cpu->id_pfr1; + case 0xd48: /* DFR0. */ +- return cpu->id_dfr0; ++ return cpu->isar.id_dfr0; + case 0xd4c: /* AFR0. */ + return cpu->id_afr0; + case 0xd50: /* MMFR0. */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 6ad211b1..7e9b85a2 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1523,7 +1523,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + #endif + } else { + cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); +- cpu->id_dfr0 = FIELD_DP32(cpu->id_dfr0, ID_DFR0, PERFMON, 0); ++ cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; + } +@@ -1761,7 +1761,7 @@ static void arm1136_r2_initfn(Object *obj) + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->id_dfr0 = 0x2; ++ cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; +@@ -1793,7 +1793,7 @@ static void arm1136_initfn(Object *obj) + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->id_dfr0 = 0x2; ++ cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; +@@ -1826,7 +1826,7 @@ static void arm1176_initfn(Object *obj) + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x11; +- cpu->id_dfr0 = 0x33; ++ cpu->isar.id_dfr0 = 0x33; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x01130003; + cpu->id_mmfr1 = 0x10030302; +@@ -1856,7 +1856,7 @@ static void arm11mpcore_initfn(Object *obj) + cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->id_dfr0 = 0; ++ cpu->isar.id_dfr0 = 0; + cpu->id_afr0 = 0x2; + cpu->id_mmfr0 = 0x01100103; + cpu->id_mmfr1 = 0x10020302; +@@ -1888,7 +1888,7 @@ static void cortex_m3_initfn(Object *obj) + cpu->pmsav7_dregion = 8; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; +- cpu->id_dfr0 = 0x00100000; ++ cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x00000030; + cpu->id_mmfr1 = 0x00000000; +@@ -1919,7 +1919,7 @@ static void cortex_m4_initfn(Object *obj) + cpu->isar.mvfr2 = 0x00000000; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; +- cpu->id_dfr0 = 0x00100000; ++ cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x00000030; + cpu->id_mmfr1 = 0x00000000; +@@ -1952,7 +1952,7 @@ static void cortex_m33_initfn(Object *obj) + cpu->isar.mvfr2 = 0x00000040; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000210; +- cpu->id_dfr0 = 0x00200000; ++ cpu->isar.id_dfr0 = 0x00200000; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x00101F40; + cpu->id_mmfr1 = 0x00000000; +@@ -2003,7 +2003,7 @@ static void cortex_r5_initfn(Object *obj) + cpu->midr = 0x411fc153; /* r1p3 */ + cpu->id_pfr0 = 0x0131; + cpu->id_pfr1 = 0x001; +- cpu->id_dfr0 = 0x010400; ++ cpu->isar.id_dfr0 = 0x010400; + cpu->id_afr0 = 0x0; + cpu->id_mmfr0 = 0x0210030; + cpu->id_mmfr1 = 0x00000000; +@@ -2058,7 +2058,7 @@ static void cortex_a8_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; +- cpu->id_dfr0 = 0x400; ++ cpu->isar.id_dfr0 = 0x400; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x31100003; + cpu->id_mmfr1 = 0x20000000; +@@ -2131,7 +2131,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; +- cpu->id_dfr0 = 0x000; ++ cpu->isar.id_dfr0 = 0x000; + cpu->id_afr0 = 0; + cpu->id_mmfr0 = 0x00100103; + cpu->id_mmfr1 = 0x20000000; +@@ -2196,7 +2196,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x00001131; + cpu->id_pfr1 = 0x00011011; +- cpu->id_dfr0 = 0x02010555; ++ cpu->isar.id_dfr0 = 0x02010555; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10101105; + cpu->id_mmfr1 = 0x40000000; +@@ -2242,7 +2242,7 @@ static void cortex_a15_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x00001131; + cpu->id_pfr1 = 0x00011011; +- cpu->id_dfr0 = 0x02010555; ++ cpu->isar.id_dfr0 = 0x02010555; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10201105; + cpu->id_mmfr1 = 0x20000000; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 91cc02b4..2d8d27e8 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -860,6 +860,7 @@ struct ARMCPU { + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; ++ uint32_t id_dfr0; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; +@@ -875,7 +876,6 @@ struct ARMCPU { + uint32_t reset_sctlr; + uint32_t id_pfr0; + uint32_t id_pfr1; +- uint32_t id_dfr0; + uint64_t pmceid0; + uint64_t pmceid1; + uint32_t id_afr0; +@@ -3491,6 +3491,13 @@ static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) + return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; + } + ++static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) ++{ ++ /* 0xf means "non-standard IMPDEF PMU" */ ++ return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && ++ FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; ++} ++ + /* + * 64-bit feature tests via id registers. + */ +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 15f4ee92..afdabbeb 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -123,7 +123,7 @@ static void aarch64_a57_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->id_dfr0 = 0x03010066; ++ cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10101105; + cpu->id_mmfr1 = 0x40000000; +@@ -177,7 +177,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->id_dfr0 = 0x03010066; ++ cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10101105; + cpu->id_mmfr1 = 0x40000000; +@@ -231,7 +231,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->id_dfr0 = 0x03010066; ++ cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; + cpu->id_mmfr0 = 0x10201105; + cpu->id_mmfr1 = 0x40000000; +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 419be640..3f06ca19 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5907,7 +5907,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_dfr0 }, ++ .resetvalue = cpu->isar.id_dfr0 }, + { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6050,8 +6050,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + } else { + define_arm_cp_regs(cpu, not_v7_cp_reginfo); + } +- if (FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) >= 4 && +- FIELD_EX32(cpu->id_dfr0, ID_DFR0, PERFMON) != 0xf) { ++ if (cpu_isar_feature(aa32_pmu_8_1, cpu)) { + ARMCPRegInfo v81_pmu_regs[] = { + { .name = "PMCEID2", .state = ARM_CP_STATE_AA32, + .cp = 15, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 4, +-- +2.25.1 + diff --git a/target-arm-Don-t-abort-on-M-profile-exception-return.patch b/target-arm-Don-t-abort-on-M-profile-exception-return.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6796e25b8b04c76a117ad129cb807a0da93da45 --- /dev/null +++ b/target-arm-Don-t-abort-on-M-profile-exception-return.patch @@ -0,0 +1,103 @@ +From 9027d3fba605d8f6093342ebe4a1da450d374630 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Thu, 22 Aug 2019 14:15:34 +0100 +Subject: [PATCH] target/arm: Don't abort on M-profile exception return in + linux-user mode + +An attempt to do an exception-return (branch to one of the magic +addresses) in linux-user mode for M-profile should behave like +a normal branch, because linux-user mode is always going to be +in 'handler' mode. This used to work, but we broke it when we added +support for the M-profile security extension in commit d02a8698d7ae2bfed. + +In that commit we allowed even handler-mode calls to magic return +values to be checked for and dealt with by causing an +EXCP_EXCEPTION_EXIT exception to be taken, because this is +needed for the FNC_RETURN return-from-non-secure-function-call +handling. For system mode we added a check in do_v7m_exception_exit() +to make any spurious calls from Handler mode behave correctly, but +forgot that linux-user mode would also be affected. + +How an attempted return-from-non-secure-function-call in linux-user +mode should be handled is not clear -- on real hardware it would +result in return to secure code (not to the Linux kernel) which +could then handle the error in any way it chose. For QEMU we take +the simple approach of treating this erroneous return the same way +it would be handled on a CPU without the security extensions -- +treat it as a normal branch. + +The upshot of all this is that for linux-user mode we should never +do any of the bx_excret magic, so the code change is simple. + +This ought to be a weird corner case that only affects broken guest +code (because Linux user processes should never be attempting to do +exception returns or NS function returns), except that the code that +assigns addresses in RAM for the process and stack in our linux-user +code does not attempt to avoid this magic address range, so +legitimate code attempting to return to a trampoline routine on the +stack can fall into this case. This change fixes those programs, +but we should also look at restricting the range of memory we +use for M-profile linux-user guests to the area that would be +real RAM in hardware. + +Cc: qemu-stable@nongnu.org +Reported-by: Christophe Lyon +Reviewed-by: Richard Henderson +Signed-off-by: Peter Maydell +Message-id: 20190822131534.16602-1-peter.maydell@linaro.org +Fixes: https://bugs.launchpad.net/qemu/+bug/1840922 +Signed-off-by: Peter Maydell +(cherry picked from commit 5e5584c89f36b302c666bc6db535fd3f7ff35ad2) +Signed-off-by: Michael Roth +--- + target/arm/translate.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/target/arm/translate.c b/target/arm/translate.c +index 7853462b21..24cb4ba075 100644 +--- a/target/arm/translate.c ++++ b/target/arm/translate.c +@@ -952,10 +952,27 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) + store_cpu_field(var, thumb); + } + +-/* Set PC and Thumb state from var. var is marked as dead. ++/* ++ * Set PC and Thumb state from var. var is marked as dead. + * For M-profile CPUs, include logic to detect exception-return + * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC, + * and BX reg, and no others, and happens only for code in Handler mode. ++ * The Security Extension also requires us to check for the FNC_RETURN ++ * which signals a function return from non-secure state; this can happen ++ * in both Handler and Thread mode. ++ * To avoid having to do multiple comparisons in inline generated code, ++ * we make the check we do here loose, so it will match for EXC_RETURN ++ * in Thread mode. For system emulation do_v7m_exception_exit() checks ++ * for these spurious cases and returns without doing anything (giving ++ * the same behaviour as for a branch to a non-magic address). ++ * ++ * In linux-user mode it is unclear what the right behaviour for an ++ * attempted FNC_RETURN should be, because in real hardware this will go ++ * directly to Secure code (ie not the Linux kernel) which will then treat ++ * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN ++ * attempt behave the way it would on a CPU without the security extension, ++ * which is to say "like a normal branch". That means we can simply treat ++ * all branches as normal with no magic address behaviour. + */ + static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) + { +@@ -963,10 +980,12 @@ static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var) + * s->base.is_jmp that we need to do the rest of the work later. + */ + gen_bx(s, var); ++#ifndef CONFIG_USER_ONLY + if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) || + (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) { + s->base.is_jmp = DISAS_BX_EXCRET; + } ++#endif + } + + static inline void gen_bx_excret_final_code(DisasContext *s) +-- +2.23.0 diff --git a/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch b/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6e82fae2dc02954d37eb723f930bccd28b70618 --- /dev/null +++ b/target-arm-Enable-ARMv8.2-ATS1E1-in-cpu-max.patch @@ -0,0 +1,57 @@ +From 69eedbfc873ded9bf35439b813e9f6a7431dc727 Mon Sep 17 00:00:00 2001 +From: Richard Henderson +Date: Sat, 8 Feb 2020 12:58:12 +0000 +Subject: [PATCH 09/13] target/arm: Enable ARMv8.2-ATS1E1 in -cpu max + +This includes enablement of ARMv8.1-PAN. + +Reviewed-by: Peter Maydell +Signed-off-by: Richard Henderson +Message-id: 20200208125816.14954-17-richard.henderson@linaro.org +Signed-off-by: Peter Maydell +--- + target/arm/cpu.c | 4 ++++ + target/arm/cpu64.c | 5 +++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index a23c71db..119bd275 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2484,6 +2484,10 @@ static void arm_max_initfn(Object *obj) + t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ + cpu->isar.mvfr2 = t; + ++ t = cpu->id_mmfr3; ++ t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ ++ cpu->id_mmfr3 = t; ++ + t = cpu->id_mmfr4; + t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ + cpu->id_mmfr4 = t; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 7ad8b5e2..a0d07fd7 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -362,6 +362,7 @@ static void aarch64_max_initfn(Object *obj) + t = cpu->isar.id_aa64mmfr1; + t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */ + t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); ++ t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ + cpu->isar.id_aa64mmfr1 = t; + + /* Replicate the same data to the 32-bit id registers. */ +@@ -382,6 +383,10 @@ static void aarch64_max_initfn(Object *obj) + u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); + cpu->isar.id_isar6 = u; + ++ u = cpu->id_mmfr3; ++ u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ ++ cpu->id_mmfr3 = u; ++ + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, + * so do not set MVFR1.FPHP. Strictly speaking this is not legal, +-- +2.25.1 + diff --git a/target-arm-Fix-PAuth-sbox-functions.patch b/target-arm-Fix-PAuth-sbox-functions.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac8d05065f766eb8ca90cd00de6a60350c2306c3 --- /dev/null +++ b/target-arm-Fix-PAuth-sbox-functions.patch @@ -0,0 +1,49 @@ +From a7149fc18020c3d432c31838069dcfcb745299bf Mon Sep 17 00:00:00 2001 +From: zhanghailiang +Date: Sat, 20 Jun 2020 12:01:30 +0800 +Subject: [PATCH] target/arm: Fix PAuth sbox functions + +In the PAC computation, sbox was applied over wrong bits. +As this is a 4-bit sbox, bit index should be incremented by 4 instead of 16. + +Test vector from QARMA paper (https://eprint.iacr.org/2016/444.pdf) was +used to verify one computation of the pauth_computepac() function which +uses sbox2. + +Launchpad: https://bugs.launchpad.net/bugs/1859713 +Reviewed-by: Richard Henderson +Signed-off-by: Vincent DEHORS +Signed-off-by: Adrien GRASSEIN +Message-id: 20200116230809.19078-2-richard.henderson@linaro.org +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +Signed-off-by: zhanghailiang +--- + target/arm/pauth_helper.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/arm/pauth_helper.c b/target/arm/pauth_helper.c +index d3194f20..0a5f41e1 100644 +--- a/target/arm/pauth_helper.c ++++ b/target/arm/pauth_helper.c +@@ -89,7 +89,7 @@ static uint64_t pac_sub(uint64_t i) + uint64_t o = 0; + int b; + +- for (b = 0; b < 64; b += 16) { ++ for (b = 0; b < 64; b += 4) { + o |= (uint64_t)sub[(i >> b) & 0xf] << b; + } + return o; +@@ -104,7 +104,7 @@ static uint64_t pac_inv_sub(uint64_t i) + uint64_t o = 0; + int b; + +- for (b = 0; b < 64; b += 16) { ++ for (b = 0; b < 64; b += 4) { + o |= (uint64_t)inv_sub[(i >> b) & 0xf] << b; + } + return o; +-- +2.23.0 + diff --git a/target-arm-Fix-write-redundant-values-to-kvm.patch b/target-arm-Fix-write-redundant-values-to-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..e165d04bafdd3fa3ceca0a1c0af68dfc0bb95df4 --- /dev/null +++ b/target-arm-Fix-write-redundant-values-to-kvm.patch @@ -0,0 +1,118 @@ +From 479c384f2944f52f9199bffa191b587a3f02663c Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Wed, 9 Dec 2020 19:35:08 +0800 +Subject: [PATCH] target/arm: Fix write redundant values to kvm + +After modifying the value of a ID register, we'd better to try to write +it to KVM so that we can known the value is acceptable for KVM. +Because it may modify the registers' values of KVM, it's not suitable +for other registers. + +(cherry-picked from a0d7a9de807639fcfcbe1fe037cb8772d459a9cf) +Signed-off-by: Peng Liang +--- + target/arm/helper.c | 73 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 49 insertions(+), 24 deletions(-) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index b262f5d6c5..bddd355fa0 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -252,6 +252,16 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri) + return true; + } + ++static bool is_id_reg(const ARMCPRegInfo *ri) ++{ ++ /* ++ * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), ++ * where 1<=crm<8, 0<=op2<8. ++ */ ++ return ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && ++ ri->crm > 0 && ri->crm < 8; ++} ++ + bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + { + /* Write the coprocessor state from cpu->env to the (index,value) list. */ +@@ -268,38 +278,53 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync) + ok = false; + continue; + } +- /* +- * (Op0, Op1, CRn, CRm, Op2) of ID registers is (3, 0, 0, crm, op2), +- * where 1<=crm<8, 0<=op2<8. Let's give ID registers a chance to +- * synchronize to kvm. +- */ +- if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && +- ri->opc0 == 3 && ri->opc1 == 0 && ri->crn == 0 && ri->crm > 0)) { ++ if ((ri->type & ARM_CP_NO_RAW) && !(kvm_sync && is_id_reg(ri))) { + continue; + } + + newval = read_raw_cp_reg(&cpu->env, ri); + if (kvm_sync) { +- /* Only sync if we can sync to KVM successfully. */ +- uint64_t oldval; +- uint64_t kvmval; ++ if (is_id_reg(ri)) { ++ /* Only sync if we can sync to KVM successfully. */ ++ uint64_t oldval; ++ uint64_t kvmval; + +- if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { +- continue; +- } +- if (oldval == newval) { +- continue; +- } ++ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &oldval)) { ++ continue; ++ } ++ if (oldval == newval) { ++ continue; ++ } + +- if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { +- continue; +- } +- if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || +- kvmval != newval) { +- continue; +- } ++ if (kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &newval)) { ++ continue; ++ } ++ if (kvm_arm_get_one_reg(cpu, cpu->cpreg_indexes[i], &kvmval) || ++ kvmval != newval) { ++ continue; ++ } ++ ++ kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); ++ } else { ++ /* ++ * Only sync if the previous list->cpustate sync succeeded. ++ * Rather than tracking the success/failure state for every ++ * item in the list, we just recheck "does the raw write we must ++ * have made in write_list_to_cpustate() read back OK" here. ++ */ ++ uint64_t oldval = cpu->cpreg_values[i]; ++ ++ if (oldval == newval) { ++ continue; ++ } + +- kvm_arm_set_one_reg(cpu, cpu->cpreg_indexes[i], &oldval); ++ write_raw_cp_reg(&cpu->env, ri, oldval); ++ if (read_raw_cp_reg(&cpu->env, ri) != oldval) { ++ continue; ++ } ++ ++ write_raw_cp_reg(&cpu->env, ri, newval); ++ } + } + cpu->cpreg_values[i] = newval; + } +-- +2.27.0 + diff --git a/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch b/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch new file mode 100644 index 0000000000000000000000000000000000000000..a46232f8ba04e1e2a956d8493dc1515fcf1f272a --- /dev/null +++ b/target-arm-Free-TCG-temps-in-trans_VMOV_64_sp.patch @@ -0,0 +1,40 @@ +From 38fb634853ac6547326d9f88b9a068d9fc6b4ad4 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Tue, 27 Aug 2019 13:19:31 +0100 +Subject: [PATCH] target/arm: Free TCG temps in trans_VMOV_64_sp() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The function neon_store_reg32() doesn't free the TCG temp that it +is passed, so the caller must do that. We got this right in most +places but forgot to free the TCG temps in trans_VMOV_64_sp(). + +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20190827121931.26836-1-peter.maydell@linaro.org +(cherry picked from commit 342d27581bd3ecdb995e4fc55fcd383cf3242888) +Signed-off-by: Michael Roth +--- + target/arm/translate-vfp.inc.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c +index 092eb5ec53..ef45cecbea 100644 +--- a/target/arm/translate-vfp.inc.c ++++ b/target/arm/translate-vfp.inc.c +@@ -881,8 +881,10 @@ static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) + /* gpreg to fpreg */ + tmp = load_reg(s, a->rt); + neon_store_reg32(tmp, a->vm); ++ tcg_temp_free_i32(tmp); + tmp = load_reg(s, a->rt2); + neon_store_reg32(tmp, a->vm + 1); ++ tcg_temp_free_i32(tmp); + } + + return true; +-- +2.23.0 diff --git a/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch b/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7f2833ee889363902a3e063bffbbff4b4e2c6af --- /dev/null +++ b/target-arm-Move-DBGDIDR-into-ARMISARegisters.patch @@ -0,0 +1,158 @@ +From df641941e6fd7fef78e5c77c9a809a7a8e148589 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:06 +0000 +Subject: [PATCH 08/13] target/arm: Move DBGDIDR into ARMISARegisters + +We're going to want to read the DBGDIDR register from KVM in +a subsequent commit, which means it needs to be in the +ARMISARegisters sub-struct. Move it. + +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20200214175116.9164-12-peter.maydell@linaro.org +--- + target/arm/cpu.c | 8 ++++---- + target/arm/cpu.h | 2 +- + target/arm/cpu64.c | 6 +++--- + target/arm/helper.c | 2 +- + target/arm/internals.h | 6 +++--- + 5 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index bb2edf4e..a23c71db 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2070,7 +2070,7 @@ static void cortex_a8_initfn(Object *obj) + cpu->isar.id_isar2 = 0x21232031; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; +- cpu->dbgdidr = 0x15141000; ++ cpu->isar.dbgdidr = 0x15141000; + cpu->clidr = (1 << 27) | (2 << 24) | 3; + cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ +@@ -2143,7 +2143,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x00111142; +- cpu->dbgdidr = 0x35141000; ++ cpu->isar.dbgdidr = 0x35141000; + cpu->clidr = (1 << 27) | (1 << 24) | 3; + cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ +@@ -2211,7 +2211,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; +- cpu->dbgdidr = 0x3515f005; ++ cpu->isar.dbgdidr = 0x3515f005; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ +@@ -2254,7 +2254,7 @@ static void cortex_a15_initfn(Object *obj) + cpu->isar.id_isar2 = 0x21232041; + cpu->isar.id_isar3 = 0x11112131; + cpu->isar.id_isar4 = 0x10011142; +- cpu->dbgdidr = 0x3515f021; ++ cpu->isar.dbgdidr = 0x3515f021; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 4b1ae32b..3040aa40 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -861,6 +861,7 @@ struct ARMCPU { + uint32_t mvfr1; + uint32_t mvfr2; + uint32_t id_dfr0; ++ uint32_t dbgdidr; + uint64_t id_aa64isar0; + uint64_t id_aa64isar1; + uint64_t id_aa64pfr0; +@@ -888,7 +889,6 @@ struct ARMCPU { + uint32_t id_mmfr4; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; +- uint32_t dbgdidr; + uint32_t clidr; + uint64_t mp_affinity; /* MP ID without feature bits */ + /* The elements of this array are the CCSIDR values for each cache, +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index aa96548f..7ad8b5e2 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -140,7 +140,7 @@ static void aarch64_a57_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; +- cpu->dbgdidr = 0x3516d000; ++ cpu->isar.dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ +@@ -194,7 +194,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ +- cpu->dbgdidr = 0x3516d000; ++ cpu->isar.dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ +@@ -247,7 +247,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x10305106; + cpu->isar.id_aa64isar0 = 0x00011120; + cpu->isar.id_aa64mmfr0 = 0x00001124; +- cpu->dbgdidr = 0x3516d000; ++ cpu->isar.dbgdidr = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ +diff --git a/target/arm/helper.c b/target/arm/helper.c +index c1ff4b6b..60ff7c0f 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5597,7 +5597,7 @@ static void define_debug_regs(ARMCPU *cpu) + ARMCPRegInfo dbgdidr = { + .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .accessfn = access_tda, +- .type = ARM_CP_CONST, .resetvalue = cpu->dbgdidr, ++ .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, + }; + + /* Note that all these register fields hold "number of Xs minus 1". */ +diff --git a/target/arm/internals.h b/target/arm/internals.h +index a72d0a6c..1d01ecc4 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -867,7 +867,7 @@ static inline int arm_num_brps(ARMCPU *cpu) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; + } else { +- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, BRPS) + 1; ++ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; + } + } + +@@ -881,7 +881,7 @@ static inline int arm_num_wrps(ARMCPU *cpu) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; + } else { +- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, WRPS) + 1; ++ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; + } + } + +@@ -895,7 +895,7 @@ static inline int arm_num_ctx_cmps(ARMCPU *cpu) + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { + return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; + } else { +- return FIELD_EX32(cpu->dbgdidr, DBGDIDR, CTX_CMPS) + 1; ++ return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; + } + } + +-- +2.25.1 + diff --git a/target-arm-Read-debug-related-ID-registers-from-KVM.patch b/target-arm-Read-debug-related-ID-registers-from-KVM.patch new file mode 100644 index 0000000000000000000000000000000000000000..1be7cd1713a456204efcb412c1d76398991ef77d --- /dev/null +++ b/target-arm-Read-debug-related-ID-registers-from-KVM.patch @@ -0,0 +1,131 @@ +From 9cda8af5af9e95e7b0ff683d0fb661c1ffcba8d8 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:07 +0000 +Subject: [PATCH 11/13] target/arm: Read debug-related ID registers from KVM + +Now we have isar_feature test functions that look at fields in the +ID_AA64DFR0_EL1 and ID_DFR0 ID registers, add the code that reads +these register values from KVM so that the checks behave correctly +when we're using KVM. + +No isar_feature function tests ID_AA64DFR1_EL1 or DBGDIDR yet, but we +add it to maintain the invariant that every field in the +ARMISARegisters struct is populated for a KVM CPU and can be relied +on. This requirement isn't actually written down yet, so add a note +to the relevant comment. + +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20200214175116.9164-13-peter.maydell@linaro.org +--- + target/arm/cpu.h | 5 +++++ + target/arm/kvm32.c | 8 ++++++++ + target/arm/kvm64.c | 36 ++++++++++++++++++++++++++++++++++++ + 3 files changed, 49 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a78c30c3..56d8cd8c 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -848,6 +848,11 @@ struct ARMCPU { + * prefix means a constant register. + * Some of these registers are split out into a substructure that + * is shared with the translators to control the ISA. ++ * ++ * Note that if you add an ID register to the ARMISARegisters struct ++ * you need to also update the 32-bit and 64-bit versions of the ++ * kvm_arm_get_host_cpu_features() function to correctly populate the ++ * field by reading the value from the KVM vCPU. + */ + struct ARMISARegisters { + uint32_t id_isar0; +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 2247148e..e984d52d 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -93,6 +93,9 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ahcf->isar.id_isar6 = 0; + } + ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, ++ ARM_CP15_REG32(0, 0, 1, 2)); ++ + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, + KVM_REG_ARM | KVM_REG_SIZE_U32 | + KVM_REG_ARM_VFP | KVM_REG_ARM_VFP_MVFR0); +@@ -121,6 +124,11 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ahcf->isar.id_mmfr4 = 0; + } + ++ /* ++ * There is no way to read DBGDIDR, because currently 32-bit KVM ++ * doesn't implement debug at all. Leave it at zero. ++ */ ++ + kvm_arm_destroy_scratch_host_vcpu(fdarray); + + if (err < 0) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 276d1466..2a88b8df 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -533,6 +533,10 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + } else { + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, + ARM64_SYS_REG(3, 0, 0, 4, 1)); ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, ++ ARM64_SYS_REG(3, 0, 0, 5, 0)); ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, ++ ARM64_SYS_REG(3, 0, 0, 5, 1)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, + ARM64_SYS_REG(3, 0, 0, 6, 0)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, +@@ -551,6 +555,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * than skipping the reads and leaving 0, as we must avoid + * considering the values in every case. + */ ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, ++ ARM64_SYS_REG(3, 0, 0, 1, 2)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, + ARM64_SYS_REG(3, 0, 0, 1, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, +@@ -582,6 +588,36 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ARM64_SYS_REG(3, 0, 0, 3, 1)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, + ARM64_SYS_REG(3, 0, 0, 3, 2)); ++ ++ /* ++ * DBGDIDR is a bit complicated because the kernel doesn't ++ * provide an accessor for it in 64-bit mode, which is what this ++ * scratch VM is in, and there's no architected "64-bit sysreg ++ * which reads the same as the 32-bit register" the way there is ++ * for other ID registers. Instead we synthesize a value from the ++ * AArch64 ID_AA64DFR0, the same way the kernel code in ++ * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. ++ * We only do this if the CPU supports AArch32 at EL1. ++ */ ++ if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { ++ int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); ++ int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); ++ int ctx_cmps = ++ FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); ++ int version = 6; /* ARMv8 debug architecture */ ++ bool has_el3 = ++ !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); ++ uint32_t dbgdidr = 0; ++ ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); ++ dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); ++ dbgdidr |= (1 << 15); /* RES1 bit */ ++ ahcf->isar.dbgdidr = dbgdidr; ++ } + } + + kvm_arm_destroy_scratch_host_vcpu(fdarray); +-- +2.25.1 + diff --git a/target-arm-Stop-assuming-DBGDIDR-always-exists.patch b/target-arm-Stop-assuming-DBGDIDR-always-exists.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7648c5430ebd61b2267a184ad16828dd8d25015 --- /dev/null +++ b/target-arm-Stop-assuming-DBGDIDR-always-exists.patch @@ -0,0 +1,186 @@ +From 1d4d4cda9637ec09f8cf30785f68b58cd46815c8 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:05 +0000 +Subject: [PATCH 07/13] target/arm: Stop assuming DBGDIDR always exists + +The AArch32 DBGDIDR defines properties like the number of +breakpoints, watchpoints and context-matching comparators. On an +AArch64 CPU, the register may not even exist if AArch32 is not +supported at EL1. + +Currently we hard-code use of DBGDIDR to identify the number of +breakpoints etc; this works for all our TCG CPUs, but will break if +we ever add an AArch64-only CPU. We also have an assert() that the +AArch32 and AArch64 registers match, which currently works only by +luck for KVM because we don't populate either of these ID registers +from the KVM vCPU and so they are both zero. + +Clean this up so we have functions for finding the number +of breakpoints, watchpoints and context comparators which look +in the appropriate ID register. + +This allows us to drop the "check that AArch64 and AArch32 agree +on the number of breakpoints etc" asserts: + * we no longer look at the AArch32 versions unless that's the + right place to be looking + * it's valid to have a CPU (eg AArch64-only) where they don't match + * we shouldn't have been asserting the validity of ID registers + in a codepath used with KVM anyway + +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20200214175116.9164-11-peter.maydell@linaro.org +--- + target/arm/cpu.h | 7 +++++++ + target/arm/debug_helper.c | 6 +++--- + target/arm/helper.c | 21 +++++--------------- + target/arm/internals.h | 42 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 57 insertions(+), 19 deletions(-) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 230130be..4b1ae32b 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1798,6 +1798,13 @@ FIELD(ID_DFR0, MPROFDBG, 20, 4) + FIELD(ID_DFR0, PERFMON, 24, 4) + FIELD(ID_DFR0, TRACEFILT, 28, 4) + ++FIELD(DBGDIDR, SE_IMP, 12, 1) ++FIELD(DBGDIDR, NSUHD_IMP, 14, 1) ++FIELD(DBGDIDR, VERSION, 16, 4) ++FIELD(DBGDIDR, CTX_CMPS, 20, 4) ++FIELD(DBGDIDR, BRPS, 24, 4) ++FIELD(DBGDIDR, WRPS, 28, 4) ++ + FIELD(MVFR0, SIMDREG, 0, 4) + FIELD(MVFR0, FPSP, 4, 4) + FIELD(MVFR0, FPDP, 8, 4) +diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c +index dde80273..3f8f667d 100644 +--- a/target/arm/debug_helper.c ++++ b/target/arm/debug_helper.c +@@ -16,8 +16,8 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) + { + CPUARMState *env = &cpu->env; + uint64_t bcr = env->cp15.dbgbcr[lbn]; +- int brps = extract32(cpu->dbgdidr, 24, 4); +- int ctx_cmps = extract32(cpu->dbgdidr, 20, 4); ++ int brps = arm_num_brps(cpu); ++ int ctx_cmps = arm_num_ctx_cmps(cpu); + int bt; + uint32_t contextidr; + +@@ -28,7 +28,7 @@ static bool linked_bp_matches(ARMCPU *cpu, int lbn) + * case DBGWCR_EL1.LBN must indicate that breakpoint). + * We choose the former. + */ +- if (lbn > brps || lbn < (brps - ctx_cmps)) { ++ if (lbn >= brps || lbn < (brps - ctx_cmps)) { + return false; + } + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index a71f4ef6..c1ff4b6b 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5601,23 +5601,12 @@ static void define_debug_regs(ARMCPU *cpu) + }; + + /* Note that all these register fields hold "number of Xs minus 1". */ +- brps = extract32(cpu->dbgdidr, 24, 4); +- wrps = extract32(cpu->dbgdidr, 28, 4); +- ctx_cmps = extract32(cpu->dbgdidr, 20, 4); ++ brps = arm_num_brps(cpu); ++ wrps = arm_num_wrps(cpu); ++ ctx_cmps = arm_num_ctx_cmps(cpu); + + assert(ctx_cmps <= brps); + +- /* The DBGDIDR and ID_AA64DFR0_EL1 define various properties +- * of the debug registers such as number of breakpoints; +- * check that if they both exist then they agree. +- */ +- if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) == brps); +- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) == wrps); +- assert(FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) +- == ctx_cmps); +- } +- + define_one_arm_cp_reg(cpu, &dbgdidr); + define_arm_cp_regs(cpu, debug_cp_reginfo); + +@@ -5625,7 +5614,7 @@ static void define_debug_regs(ARMCPU *cpu) + define_arm_cp_regs(cpu, debug_lpae_cp_reginfo); + } + +- for (i = 0; i < brps + 1; i++) { ++ for (i = 0; i < brps; i++) { + ARMCPRegInfo dbgregs[] = { + { .name = "DBGBVR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 4, +@@ -5644,7 +5633,7 @@ static void define_debug_regs(ARMCPU *cpu) + define_arm_cp_regs(cpu, dbgregs); + } + +- for (i = 0; i < wrps + 1; i++) { ++ for (i = 0; i < wrps; i++) { + ARMCPRegInfo dbgregs[] = { + { .name = "DBGWVR", .state = ARM_CP_STATE_BOTH, + .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = i, .opc2 = 6, +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 232d9638..a72d0a6c 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -857,6 +857,48 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) + } + } + ++/** ++ * arm_num_brps: Return number of implemented breakpoints. ++ * Note that the ID register BRPS field is "number of bps - 1", ++ * and we return the actual number of breakpoints. ++ */ ++static inline int arm_num_brps(ARMCPU *cpu) ++{ ++ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { ++ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; ++ } else { ++ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, BRPS) + 1; ++ } ++} ++ ++/** ++ * arm_num_wrps: Return number of implemented watchpoints. ++ * Note that the ID register WRPS field is "number of wps - 1", ++ * and we return the actual number of watchpoints. ++ */ ++static inline int arm_num_wrps(ARMCPU *cpu) ++{ ++ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { ++ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; ++ } else { ++ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, WRPS) + 1; ++ } ++} ++ ++/** ++ * arm_num_ctx_cmps: Return number of implemented context comparators. ++ * Note that the ID register CTX_CMPS field is "number of cmps - 1", ++ * and we return the actual number of comparators. ++ */ ++static inline int arm_num_ctx_cmps(ARMCPU *cpu) ++{ ++ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { ++ return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; ++ } else { ++ return FIELD_EX32(cpu->dbgdidr, DBGDIDR, CTX_CMPS) + 1; ++ } ++} ++ + /* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3. + * Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits. + */ +-- +2.25.1 + diff --git a/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch b/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch new file mode 100644 index 0000000000000000000000000000000000000000..ecbaf7750c42aab1efade6d50e53fd7e92762883 --- /dev/null +++ b/target-arm-Test-correct-register-in-aa32_pan-and-aa3.patch @@ -0,0 +1,453 @@ +From 2bc630dc858bd0c010b7c375ebf1e8f4b4e0e346 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:13 +0000 +Subject: [PATCH 10/13] target/arm: Test correct register in aa32_pan and + aa32_ats1e1 checks + +The isar_feature_aa32_pan and isar_feature_aa32_ats1e1 functions +are supposed to be testing fields in ID_MMFR3; but a cut-and-paste +error meant we were looking at MVFR0 instead. + +Fix the functions to look at the right register; this requires +us to move at least id_mmfr3 to the ARMISARegisters struct; we +choose to move all the ID_MMFRn registers for consistency. + +Fixes: 3d6ad6bb466f +Signed-off-by: Peter Maydell +Reviewed-by: Richard Henderson +Message-id: 20200214175116.9164-19-peter.maydell@linaro.org +--- + hw/intc/armv7m_nvic.c | 8 ++-- + target/arm/cpu.c | 96 +++++++++++++++++++++---------------------- + target/arm/cpu.h | 14 +++---- + target/arm/cpu64.c | 28 ++++++------- + target/arm/helper.c | 12 +++--- + target/arm/kvm32.c | 17 ++++++++ + target/arm/kvm64.c | 10 +++++ + 7 files changed, 106 insertions(+), 79 deletions(-) + +diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c +index 0741db7b..f7ef6ad1 100644 +--- a/hw/intc/armv7m_nvic.c ++++ b/hw/intc/armv7m_nvic.c +@@ -1227,13 +1227,13 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) + case 0xd4c: /* AFR0. */ + return cpu->id_afr0; + case 0xd50: /* MMFR0. */ +- return cpu->id_mmfr0; ++ return cpu->isar.id_mmfr0; + case 0xd54: /* MMFR1. */ +- return cpu->id_mmfr1; ++ return cpu->isar.id_mmfr1; + case 0xd58: /* MMFR2. */ +- return cpu->id_mmfr2; ++ return cpu->isar.id_mmfr2; + case 0xd5c: /* MMFR3. */ +- return cpu->id_mmfr3; ++ return cpu->isar.id_mmfr3; + case 0xd60: /* ISAR0. */ + return cpu->isar.id_isar0; + case 0xd64: /* ISAR1. */ +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 119bd275..c3728e3d 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1764,9 +1764,9 @@ static void arm1136_r2_initfn(Object *obj) + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; +- cpu->id_mmfr0 = 0x01130003; +- cpu->id_mmfr1 = 0x10030302; +- cpu->id_mmfr2 = 0x01222110; ++ cpu->isar.id_mmfr0 = 0x01130003; ++ cpu->isar.id_mmfr1 = 0x10030302; ++ cpu->isar.id_mmfr2 = 0x01222110; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; +@@ -1796,9 +1796,9 @@ static void arm1136_initfn(Object *obj) + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0x2; + cpu->id_afr0 = 0x3; +- cpu->id_mmfr0 = 0x01130003; +- cpu->id_mmfr1 = 0x10030302; +- cpu->id_mmfr2 = 0x01222110; ++ cpu->isar.id_mmfr0 = 0x01130003; ++ cpu->isar.id_mmfr1 = 0x10030302; ++ cpu->isar.id_mmfr2 = 0x01222110; + cpu->isar.id_isar0 = 0x00140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231111; +@@ -1829,9 +1829,9 @@ static void arm1176_initfn(Object *obj) + cpu->id_pfr1 = 0x11; + cpu->isar.id_dfr0 = 0x33; + cpu->id_afr0 = 0; +- cpu->id_mmfr0 = 0x01130003; +- cpu->id_mmfr1 = 0x10030302; +- cpu->id_mmfr2 = 0x01222100; ++ cpu->isar.id_mmfr0 = 0x01130003; ++ cpu->isar.id_mmfr1 = 0x10030302; ++ cpu->isar.id_mmfr2 = 0x01222100; + cpu->isar.id_isar0 = 0x0140011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11231121; +@@ -1859,9 +1859,9 @@ static void arm11mpcore_initfn(Object *obj) + cpu->id_pfr1 = 0x1; + cpu->isar.id_dfr0 = 0; + cpu->id_afr0 = 0x2; +- cpu->id_mmfr0 = 0x01100103; +- cpu->id_mmfr1 = 0x10020302; +- cpu->id_mmfr2 = 0x01222000; ++ cpu->isar.id_mmfr0 = 0x01100103; ++ cpu->isar.id_mmfr1 = 0x10020302; ++ cpu->isar.id_mmfr2 = 0x01222000; + cpu->isar.id_isar0 = 0x00100011; + cpu->isar.id_isar1 = 0x12002111; + cpu->isar.id_isar2 = 0x11221011; +@@ -1891,10 +1891,10 @@ static void cortex_m3_initfn(Object *obj) + cpu->id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x00000030; +- cpu->id_mmfr1 = 0x00000000; +- cpu->id_mmfr2 = 0x00000000; +- cpu->id_mmfr3 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x00000030; ++ cpu->isar.id_mmfr1 = 0x00000000; ++ cpu->isar.id_mmfr2 = 0x00000000; ++ cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; +@@ -1922,10 +1922,10 @@ static void cortex_m4_initfn(Object *obj) + cpu->id_pfr1 = 0x00000200; + cpu->isar.id_dfr0 = 0x00100000; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x00000030; +- cpu->id_mmfr1 = 0x00000000; +- cpu->id_mmfr2 = 0x00000000; +- cpu->id_mmfr3 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x00000030; ++ cpu->isar.id_mmfr1 = 0x00000000; ++ cpu->isar.id_mmfr2 = 0x00000000; ++ cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01141110; + cpu->isar.id_isar1 = 0x02111000; + cpu->isar.id_isar2 = 0x21112231; +@@ -1955,10 +1955,10 @@ static void cortex_m33_initfn(Object *obj) + cpu->id_pfr1 = 0x00000210; + cpu->isar.id_dfr0 = 0x00200000; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x00101F40; +- cpu->id_mmfr1 = 0x00000000; +- cpu->id_mmfr2 = 0x01000000; +- cpu->id_mmfr3 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x00101F40; ++ cpu->isar.id_mmfr1 = 0x00000000; ++ cpu->isar.id_mmfr2 = 0x01000000; ++ cpu->isar.id_mmfr3 = 0x00000000; + cpu->isar.id_isar0 = 0x01101110; + cpu->isar.id_isar1 = 0x02212000; + cpu->isar.id_isar2 = 0x20232232; +@@ -2006,10 +2006,10 @@ static void cortex_r5_initfn(Object *obj) + cpu->id_pfr1 = 0x001; + cpu->isar.id_dfr0 = 0x010400; + cpu->id_afr0 = 0x0; +- cpu->id_mmfr0 = 0x0210030; +- cpu->id_mmfr1 = 0x00000000; +- cpu->id_mmfr2 = 0x01200000; +- cpu->id_mmfr3 = 0x0211; ++ cpu->isar.id_mmfr0 = 0x0210030; ++ cpu->isar.id_mmfr1 = 0x00000000; ++ cpu->isar.id_mmfr2 = 0x01200000; ++ cpu->isar.id_mmfr3 = 0x0211; + cpu->isar.id_isar0 = 0x02101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232141; +@@ -2061,10 +2061,10 @@ static void cortex_a8_initfn(Object *obj) + cpu->id_pfr1 = 0x11; + cpu->isar.id_dfr0 = 0x400; + cpu->id_afr0 = 0; +- cpu->id_mmfr0 = 0x31100003; +- cpu->id_mmfr1 = 0x20000000; +- cpu->id_mmfr2 = 0x01202000; +- cpu->id_mmfr3 = 0x11; ++ cpu->isar.id_mmfr0 = 0x31100003; ++ cpu->isar.id_mmfr1 = 0x20000000; ++ cpu->isar.id_mmfr2 = 0x01202000; ++ cpu->isar.id_mmfr3 = 0x11; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x12112111; + cpu->isar.id_isar2 = 0x21232031; +@@ -2134,10 +2134,10 @@ static void cortex_a9_initfn(Object *obj) + cpu->id_pfr1 = 0x11; + cpu->isar.id_dfr0 = 0x000; + cpu->id_afr0 = 0; +- cpu->id_mmfr0 = 0x00100103; +- cpu->id_mmfr1 = 0x20000000; +- cpu->id_mmfr2 = 0x01230000; +- cpu->id_mmfr3 = 0x00002111; ++ cpu->isar.id_mmfr0 = 0x00100103; ++ cpu->isar.id_mmfr1 = 0x20000000; ++ cpu->isar.id_mmfr2 = 0x01230000; ++ cpu->isar.id_mmfr3 = 0x00002111; + cpu->isar.id_isar0 = 0x00101111; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; +@@ -2199,10 +2199,10 @@ static void cortex_a7_initfn(Object *obj) + cpu->id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x02010555; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x10101105; +- cpu->id_mmfr1 = 0x40000000; +- cpu->id_mmfr2 = 0x01240000; +- cpu->id_mmfr3 = 0x02102211; ++ cpu->isar.id_mmfr0 = 0x10101105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01240000; ++ cpu->isar.id_mmfr3 = 0x02102211; + /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but + * table 4-41 gives 0x02101110, which includes the arm div insns. + */ +@@ -2245,10 +2245,10 @@ static void cortex_a15_initfn(Object *obj) + cpu->id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x02010555; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x10201105; +- cpu->id_mmfr1 = 0x20000000; +- cpu->id_mmfr2 = 0x01240000; +- cpu->id_mmfr3 = 0x02102211; ++ cpu->isar.id_mmfr0 = 0x10201105; ++ cpu->isar.id_mmfr1 = 0x20000000; ++ cpu->isar.id_mmfr2 = 0x01240000; ++ cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232041; +@@ -2484,13 +2484,13 @@ static void arm_max_initfn(Object *obj) + t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ + cpu->isar.mvfr2 = t; + +- t = cpu->id_mmfr3; ++ t = cpu->isar.id_mmfr3; + t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ +- cpu->id_mmfr3 = t; ++ cpu->isar.id_mmfr3 = t; + +- t = cpu->id_mmfr4; ++ t = cpu->isar.id_mmfr4; + t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ +- cpu->id_mmfr4 = t; ++ cpu->isar.id_mmfr4 = t; + } + #endif + } +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 3040aa40..a78c30c3 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -857,6 +857,11 @@ struct ARMCPU { + uint32_t id_isar4; + uint32_t id_isar5; + uint32_t id_isar6; ++ uint32_t id_mmfr0; ++ uint32_t id_mmfr1; ++ uint32_t id_mmfr2; ++ uint32_t id_mmfr3; ++ uint32_t id_mmfr4; + uint32_t mvfr0; + uint32_t mvfr1; + uint32_t mvfr2; +@@ -882,11 +887,6 @@ struct ARMCPU { + uint64_t pmceid0; + uint64_t pmceid1; + uint32_t id_afr0; +- uint32_t id_mmfr0; +- uint32_t id_mmfr1; +- uint32_t id_mmfr2; +- uint32_t id_mmfr3; +- uint32_t id_mmfr4; + uint64_t id_aa64afr0; + uint64_t id_aa64afr1; + uint32_t clidr; +@@ -3490,12 +3490,12 @@ static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) + + static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) != 0; ++ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; + } + + static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr0, ID_MMFR3, PAN) >= 2; ++ return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; + } + + static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index a0d07fd7..d450b8c8 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -125,10 +125,10 @@ static void aarch64_a57_initfn(Object *obj) + cpu->id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x10101105; +- cpu->id_mmfr1 = 0x40000000; +- cpu->id_mmfr2 = 0x01260000; +- cpu->id_mmfr3 = 0x02102211; ++ cpu->isar.id_mmfr0 = 0x10101105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; +@@ -179,10 +179,10 @@ static void aarch64_a53_initfn(Object *obj) + cpu->id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x10101105; +- cpu->id_mmfr1 = 0x40000000; +- cpu->id_mmfr2 = 0x01260000; +- cpu->id_mmfr3 = 0x02102211; ++ cpu->isar.id_mmfr0 = 0x10101105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; +@@ -233,10 +233,10 @@ static void aarch64_a72_initfn(Object *obj) + cpu->id_pfr1 = 0x00011011; + cpu->isar.id_dfr0 = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->id_mmfr0 = 0x10201105; +- cpu->id_mmfr1 = 0x40000000; +- cpu->id_mmfr2 = 0x01260000; +- cpu->id_mmfr3 = 0x02102211; ++ cpu->isar.id_mmfr0 = 0x10201105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; + cpu->isar.id_isar0 = 0x02101110; + cpu->isar.id_isar1 = 0x13112111; + cpu->isar.id_isar2 = 0x21232042; +@@ -383,9 +383,9 @@ static void aarch64_max_initfn(Object *obj) + u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); + cpu->isar.id_isar6 = u; + +- u = cpu->id_mmfr3; ++ u = cpu->isar.id_mmfr3; + u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ +- cpu->id_mmfr3 = u; ++ cpu->isar.id_mmfr3 = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 60ff7c0f..49cd7a7e 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5906,19 +5906,19 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_mmfr0 }, ++ .resetvalue = cpu->isar.id_mmfr0 }, + { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_mmfr1 }, ++ .resetvalue = cpu->isar.id_mmfr1 }, + { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_mmfr2 }, ++ .resetvalue = cpu->isar.id_mmfr2 }, + { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_mmfr3 }, ++ .resetvalue = cpu->isar.id_mmfr3 }, + { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -5946,7 +5946,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->id_mmfr4 }, ++ .resetvalue = cpu->isar.id_mmfr4 }, + { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6426,7 +6426,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); + define_arm_cp_regs(cpu, vmsa_cp_reginfo); + /* TTCBR2 is introduced with ARMv8.2-A32HPD. */ +- if (FIELD_EX32(cpu->id_mmfr4, ID_MMFR4, HPDS) != 0) { ++ if (FIELD_EX32(cpu->isar.id_mmfr4, ID_MMFR4, HPDS) != 0) { + define_one_arm_cp_reg(cpu, &ttbcr2_reginfo); + } + } +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index ee158830..2247148e 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -104,6 +104,23 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * Fortunately there is not yet anything in there that affects migration. + */ + ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, ++ ARM_CP15_REG32(0, 0, 1, 4)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, ++ ARM_CP15_REG32(0, 0, 1, 5)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, ++ ARM_CP15_REG32(0, 0, 1, 6)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, ++ ARM_CP15_REG32(0, 0, 1, 7)); ++ if (read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, ++ ARM_CP15_REG32(0, 0, 2, 6))) { ++ /* ++ * Older kernels don't support reading ID_MMFR4 (a new in v8 ++ * register); assume it's zero. ++ */ ++ ahcf->isar.id_mmfr4 = 0; ++ } ++ + kvm_arm_destroy_scratch_host_vcpu(fdarray); + + if (err < 0) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index b794108a..276d1466 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -551,6 +551,14 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * than skipping the reads and leaving 0, as we must avoid + * considering the values in every case. + */ ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, ++ ARM64_SYS_REG(3, 0, 0, 1, 4)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, ++ ARM64_SYS_REG(3, 0, 0, 1, 5)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, ++ ARM64_SYS_REG(3, 0, 0, 1, 6)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, ++ ARM64_SYS_REG(3, 0, 0, 1, 7)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, + ARM64_SYS_REG(3, 0, 0, 2, 0)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, +@@ -563,6 +571,8 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ARM64_SYS_REG(3, 0, 0, 2, 4)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, + ARM64_SYS_REG(3, 0, 0, 2, 5)); ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, ++ ARM64_SYS_REG(3, 0, 0, 2, 6)); + err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, + ARM64_SYS_REG(3, 0, 0, 2, 7)); + +-- +2.25.1 + diff --git a/target-arm-Update-ID-fields.patch b/target-arm-Update-ID-fields.patch new file mode 100644 index 0000000000000000000000000000000000000000..94ed8027c9e238f384e767bc88c209749eee234e --- /dev/null +++ b/target-arm-Update-ID-fields.patch @@ -0,0 +1,84 @@ +From 47c76d73a435884b66ce6417cb853893099be5eb Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Tue, 11 Aug 2020 10:18:57 +0800 +Subject: [PATCH 8/9] target/arm: Update ID fields + +Update definitions for ID fields, up to ARMv8.6. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu.h | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 068c3fa2..eb875e11 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1691,6 +1691,8 @@ FIELD(ID_ISAR6, DP, 4, 4) + FIELD(ID_ISAR6, FHM, 8, 4) + FIELD(ID_ISAR6, SB, 12, 4) + FIELD(ID_ISAR6, SPECRES, 16, 4) ++FIELD(ID_ISAR6, BF16, 20, 4) ++FIELD(ID_ISAR6, I8MM, 24, 4) + + FIELD(ID_MMFR3, CMAINTVA, 0, 4) + FIELD(ID_MMFR3, CMAINTSW, 4, 4) +@@ -1736,6 +1738,9 @@ FIELD(ID_AA64ISAR1, GPI, 28, 4) + FIELD(ID_AA64ISAR1, FRINTTS, 32, 4) + FIELD(ID_AA64ISAR1, SB, 36, 4) + FIELD(ID_AA64ISAR1, SPECRES, 40, 4) ++FIELD(ID_AA64ISAR1, BF16, 44, 4) ++FIELD(ID_AA64ISAR1, DGH, 48, 4) ++FIELD(ID_AA64ISAR1, I8MM, 52, 4) + + FIELD(ID_AA64PFR0, EL0, 0, 4) + FIELD(ID_AA64PFR0, EL1, 4, 4) +@@ -1746,11 +1751,18 @@ FIELD(ID_AA64PFR0, ADVSIMD, 20, 4) + FIELD(ID_AA64PFR0, GIC, 24, 4) + FIELD(ID_AA64PFR0, RAS, 28, 4) + FIELD(ID_AA64PFR0, SVE, 32, 4) ++FIELD(ID_AA64PFR0, SEL2, 36, 4) ++FIELD(ID_AA64PFR0, MPAM, 40, 4) ++FIELD(ID_AA64PFR0, AMU, 44, 4) ++FIELD(ID_AA64PFR0, DIT, 44, 4) ++FIELD(ID_AA64PFR0, CSV2, 56, 4) ++FIELD(ID_AA64PFR0, CSV3, 60, 4) + + FIELD(ID_AA64PFR1, BT, 0, 4) + FIELD(ID_AA64PFR1, SBSS, 4, 4) + FIELD(ID_AA64PFR1, MTE, 8, 4) + FIELD(ID_AA64PFR1, RAS_FRAC, 12, 4) ++FIELD(ID_AA64PFR1, MPAM_FRAC, 16, 4) + + FIELD(ID_AA64MMFR0, PARANGE, 0, 4) + FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) +@@ -1764,6 +1776,8 @@ FIELD(ID_AA64MMFR0, TGRAN16_2, 32, 4) + FIELD(ID_AA64MMFR0, TGRAN64_2, 36, 4) + FIELD(ID_AA64MMFR0, TGRAN4_2, 40, 4) + FIELD(ID_AA64MMFR0, EXS, 44, 4) ++FIELD(ID_AA64MMFR0, FGT, 56, 4) ++FIELD(ID_AA64MMFR0, ECV, 60, 4) + + FIELD(ID_AA64MMFR1, HAFDBS, 0, 4) + FIELD(ID_AA64MMFR1, VMIDBITS, 4, 4) +@@ -1773,6 +1787,8 @@ FIELD(ID_AA64MMFR1, LO, 16, 4) + FIELD(ID_AA64MMFR1, PAN, 20, 4) + FIELD(ID_AA64MMFR1, SPECSEI, 24, 4) + FIELD(ID_AA64MMFR1, XNX, 28, 4) ++FIELD(ID_AA64MMFR1, TWED, 32, 4) ++FIELD(ID_AA64MMFR1, ETS, 36, 4) + + FIELD(ID_AA64MMFR2, CNP, 0, 4) + FIELD(ID_AA64MMFR2, UAO, 4, 4) +@@ -1799,6 +1815,7 @@ FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) + FIELD(ID_AA64DFR0, PMSVER, 32, 4) + FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) + FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) ++FIELD(ID_AA64DFR0, MUPMU, 48, 4) + + FIELD(ID_DFR0, COPDBG, 0, 4) + FIELD(ID_DFR0, COPSDBG, 4, 4) +-- +2.25.1 + diff --git a/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch b/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch new file mode 100644 index 0000000000000000000000000000000000000000..586dcbb1998a3e0e910feec54d326f577154711e --- /dev/null +++ b/target-arm-Update-the-ID-registers-of-Kunpeng-920.patch @@ -0,0 +1,57 @@ +From b54ca94f19a9b22537712638ae05d2095258eb80 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Sat, 19 Sep 2020 09:04:45 +0800 +Subject: [PATCH] target/arm: Update the ID registers of Kunpeng-920 + +The values of some ID registers in Kunpeng-920 are not exactly correct. +Let's update them. The values are read from Kunpeng-920 by calling +read_sysreg_s. + +Signed-off-by: Peng Liang +--- + target/arm/cpu64.c | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 726d123d8e..a1649f8844 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -275,10 +275,33 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + + cpu->midr = 0x480fd010; + cpu->ctr = 0x84448004; +- cpu->isar.regs[ID_AA64PFR0] = 0x11001111; ++ cpu->isar.regs[ID_ISAR0] = 0; ++ cpu->isar.regs[ID_ISAR1] = 0; ++ cpu->isar.regs[ID_ISAR2] = 0; ++ cpu->isar.regs[ID_ISAR3] = 0; ++ cpu->isar.regs[ID_ISAR4] = 0; ++ cpu->isar.regs[ID_ISAR5] = 0; ++ cpu->isar.regs[ID_MMFR0] = 0; ++ cpu->isar.regs[ID_MMFR1] = 0; ++ cpu->isar.regs[ID_MMFR2] = 0; ++ cpu->isar.regs[ID_MMFR3] = 0; ++ cpu->isar.regs[ID_MMFR4] = 0; ++ cpu->isar.regs[MVFR0] = 0; ++ cpu->isar.regs[MVFR1] = 0; ++ cpu->isar.regs[MVFR2] = 0; ++ cpu->isar.regs[ID_DFR0] = 0; ++ cpu->isar.regs[MVFR2] = 0; ++ cpu->isar.regs[MVFR2] = 0; ++ cpu->isar.regs[MVFR2] = 0; ++ cpu->id_pfr0 = 0; ++ cpu->id_pfr1 = 0; ++ cpu->isar.regs[ID_AA64PFR0] = 0x0000010011111111; + cpu->isar.regs[ID_AA64DFR0] = 0x110305408; +- cpu->isar.regs[ID_AA64ISAR0] = 0x10211120; ++ cpu->isar.regs[ID_AA64ISAR0] = 0x0001100010211120; ++ cpu->isar.regs[ID_AA64ISAR1] = 0x00011001; + cpu->isar.regs[ID_AA64MMFR0] = 0x101125; ++ cpu->isar.regs[ID_AA64MMFR1] = 0x10211122; ++ cpu->isar.regs[ID_AA64MMFR2] = 0x00001011; + } + + static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, +-- +2.23.0 + diff --git a/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch b/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e32f85104cb492dba2d0e72aa6138342ef960db --- /dev/null +++ b/target-arm-Use-FIELD-macros-for-clearing-ID_DFR0-PER.patch @@ -0,0 +1,36 @@ +From f54cdca97bf86f5ca1df8471bc229b89797b287e Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 14 Feb 2020 17:51:02 +0000 +Subject: [PATCH 04/13] target/arm: Use FIELD macros for clearing ID_DFR0 + PERFMON field +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We already define FIELD macros for ID_DFR0, so use them in the +one place where we're doing direct bit value manipulation. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Richard Henderson +Signed-off-by: Peter Maydell +Message-id: 20200214175116.9164-8-peter.maydell@linaro.org +--- + target/arm/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index dbd05e01..6ad211b1 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1523,7 +1523,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + #endif + } else { + cpu->id_aa64dfr0 = FIELD_DP64(cpu->id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); +- cpu->id_dfr0 &= ~(0xf << 24); ++ cpu->id_dfr0 = FIELD_DP32(cpu->id_dfr0, ID_DFR0, PERFMON, 0); + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; + } +-- +2.25.1 + diff --git a/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch b/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch new file mode 100644 index 0000000000000000000000000000000000000000..455dc843c105743750f7bc573b6fb86f3a5861b8 --- /dev/null +++ b/target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch @@ -0,0 +1,42 @@ +From ad6ce039cab07b6a99ccaa36fbb0043ae85a74c9 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Mon, 21 Sep 2020 22:14:20 +0800 +Subject: [PATCH] target/arm: clear EL2 and EL3 only when kvm is not enabled + +When has_el2 and has_el3 are disabled, which is the default value for +virt machine, QEMU will clear the corresponding field in ID_PFR1_EL1 and +ID_AA64PFR0_EL1 to not expose EL3 and EL2 to guest. Because KVM doesn't +support to emulate ID registers in AArch64 before, it will not take +effect. Hence, clear EL2 and EL3 only when kvm is not enabled for +backwards compatibility. + +Signed-off-by: Peng Liang +--- + target/arm/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 7ae2d3da56..3f62336acf 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1996,7 +1996,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + } + } + +- if (!cpu->has_el3) { ++ if (!cpu->has_el3 && !kvm_enabled()) { + /* If the has_el3 CPU property is disabled then we need to disable the + * feature. + */ +@@ -2037,7 +2037,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + cpu->pmceid1 = 0; + } + +- if (!arm_feature(env, ARM_FEATURE_EL2)) { ++ if (!arm_feature(env, ARM_FEATURE_EL2) && !kvm_enabled()) { + /* Disable the hypervisor feature bits in the processor feature + * registers if we don't have EL2. These are id_pfr1[15:12] and + * id_aa64pfr0_el1[11:8]. +-- +2.23.0 + diff --git a/target-arm-convert-isar-regs-to-array.patch b/target-arm-convert-isar-regs-to-array.patch new file mode 100644 index 0000000000000000000000000000000000000000..528371212aad42f034db62858b1a2da2cdcba79d --- /dev/null +++ b/target-arm-convert-isar-regs-to-array.patch @@ -0,0 +1,1908 @@ +From ac92f0f7bbf7cf063ba45fbfaf7e7970dd76544a Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:25 +0800 +Subject: [PATCH 1/9] target/arm: convert isar regs to array + +The isar in ARMCPU is a struct, each field of which represents an ID +register. It's not convenient for us to support CPU feature in AArch64. +So let's change it to an array first and add an enum as the index of the +array for convenience. Since we will never access high 32-bits of ID +registers in AArch32, it's harmless to change the ID registers in +AArch32 to 64-bits. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + hw/intc/armv7m_nvic.c | 28 +-- + target/arm/cpu.c | 440 +++++++++++++++++++++-------------------- + target/arm/cpu.h | 178 +++++++++-------- + target/arm/cpu64.c | 158 +++++++-------- + target/arm/helper.c | 54 ++--- + target/arm/internals.h | 15 +- + target/arm/kvm64.c | 68 +++---- + 7 files changed, 478 insertions(+), 463 deletions(-) + +diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c +index f7ef6ad1..5013ec97 100644 +--- a/hw/intc/armv7m_nvic.c ++++ b/hw/intc/armv7m_nvic.c +@@ -1223,29 +1223,29 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) + case 0xd44: /* PFR1. */ + return cpu->id_pfr1; + case 0xd48: /* DFR0. */ +- return cpu->isar.id_dfr0; ++ return cpu->isar.regs[ID_DFR0]; + case 0xd4c: /* AFR0. */ + return cpu->id_afr0; + case 0xd50: /* MMFR0. */ +- return cpu->isar.id_mmfr0; ++ return cpu->isar.regs[ID_MMFR0]; + case 0xd54: /* MMFR1. */ +- return cpu->isar.id_mmfr1; ++ return cpu->isar.regs[ID_MMFR1]; + case 0xd58: /* MMFR2. */ +- return cpu->isar.id_mmfr2; ++ return cpu->isar.regs[ID_MMFR2]; + case 0xd5c: /* MMFR3. */ +- return cpu->isar.id_mmfr3; ++ return cpu->isar.regs[ID_MMFR3]; + case 0xd60: /* ISAR0. */ +- return cpu->isar.id_isar0; ++ return cpu->isar.regs[ID_ISAR0]; + case 0xd64: /* ISAR1. */ +- return cpu->isar.id_isar1; ++ return cpu->isar.regs[ID_ISAR1]; + case 0xd68: /* ISAR2. */ +- return cpu->isar.id_isar2; ++ return cpu->isar.regs[ID_ISAR2]; + case 0xd6c: /* ISAR3. */ +- return cpu->isar.id_isar3; ++ return cpu->isar.regs[ID_ISAR3]; + case 0xd70: /* ISAR4. */ +- return cpu->isar.id_isar4; ++ return cpu->isar.regs[ID_ISAR4]; + case 0xd74: /* ISAR5. */ +- return cpu->isar.id_isar5; ++ return cpu->isar.regs[ID_ISAR5]; + case 0xd78: /* CLIDR */ + return cpu->clidr; + case 0xd7c: /* CTR */ +@@ -1450,11 +1450,11 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs) + } + return cpu->env.v7m.fpdscr[attrs.secure]; + case 0xf40: /* MVFR0 */ +- return cpu->isar.mvfr0; ++ return cpu->isar.regs[MVFR0]; + case 0xf44: /* MVFR1 */ +- return cpu->isar.mvfr1; ++ return cpu->isar.regs[MVFR1]; + case 0xf48: /* MVFR2 */ +- return cpu->isar.mvfr2; ++ return cpu->isar.regs[MVFR2]; + default: + bad_offset: + qemu_log_mask(LOG_GUEST_ERROR, "NVIC: Bad read offset 0x%x\n", offset); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index c3728e3d..5bcdad0c 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -170,9 +170,9 @@ static void arm_cpu_reset(CPUState *s) + g_hash_table_foreach(cpu->cp_regs, cp_reg_check_reset, cpu); + + env->vfp.xregs[ARM_VFP_FPSID] = cpu->reset_fpsid; +- env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.mvfr0; +- env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.mvfr1; +- env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.mvfr2; ++ env->vfp.xregs[ARM_VFP_MVFR0] = cpu->isar.regs[MVFR0]; ++ env->vfp.xregs[ARM_VFP_MVFR1] = cpu->isar.regs[MVFR1]; ++ env->vfp.xregs[ARM_VFP_MVFR2] = cpu->isar.regs[MVFR2]; + + cpu->power_state = cpu->start_powered_off ? PSCI_OFF : PSCI_ON; + s->halted = cpu->start_powered_off; +@@ -1251,19 +1251,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + unset_feature(env, ARM_FEATURE_VFP3); + unset_feature(env, ARM_FEATURE_VFP4); + +- t = cpu->isar.id_aa64isar1; ++ t = cpu->isar.regs[ID_AA64ISAR1]; + t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 0); +- cpu->isar.id_aa64isar1 = t; ++ cpu->isar.regs[ID_AA64ISAR1] = t; + +- t = cpu->isar.id_aa64pfr0; ++ t = cpu->isar.regs[ID_AA64PFR0]; + t = FIELD_DP64(t, ID_AA64PFR0, FP, 0xf); +- cpu->isar.id_aa64pfr0 = t; ++ cpu->isar.regs[ID_AA64PFR0] = t; + +- u = cpu->isar.id_isar6; ++ u = cpu->isar.regs[ID_ISAR6]; + u = FIELD_DP32(u, ID_ISAR6, JSCVT, 0); +- cpu->isar.id_isar6 = u; ++ cpu->isar.regs[ID_ISAR6] = u; + +- u = cpu->isar.mvfr0; ++ u = cpu->isar.regs[MVFR0]; + u = FIELD_DP32(u, MVFR0, FPSP, 0); + u = FIELD_DP32(u, MVFR0, FPDP, 0); + u = FIELD_DP32(u, MVFR0, FPTRAP, 0); +@@ -1271,17 +1271,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + u = FIELD_DP32(u, MVFR0, FPSQRT, 0); + u = FIELD_DP32(u, MVFR0, FPSHVEC, 0); + u = FIELD_DP32(u, MVFR0, FPROUND, 0); +- cpu->isar.mvfr0 = u; ++ cpu->isar.regs[MVFR0] = u; + +- u = cpu->isar.mvfr1; ++ u = cpu->isar.regs[MVFR1]; + u = FIELD_DP32(u, MVFR1, FPFTZ, 0); + u = FIELD_DP32(u, MVFR1, FPDNAN, 0); + u = FIELD_DP32(u, MVFR1, FPHP, 0); +- cpu->isar.mvfr1 = u; ++ cpu->isar.regs[MVFR1] = u; + +- u = cpu->isar.mvfr2; ++ u = cpu->isar.regs[MVFR2]; + u = FIELD_DP32(u, MVFR2, FPMISC, 0); +- cpu->isar.mvfr2 = u; ++ cpu->isar.regs[MVFR2] = u; + } + + if (!cpu->has_neon) { +@@ -1290,56 +1290,56 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + + unset_feature(env, ARM_FEATURE_NEON); + +- t = cpu->isar.id_aa64isar0; ++ t = cpu->isar.regs[ID_AA64ISAR0]; + t = FIELD_DP64(t, ID_AA64ISAR0, DP, 0); +- cpu->isar.id_aa64isar0 = t; ++ cpu->isar.regs[ID_AA64ISAR0] = t; + +- t = cpu->isar.id_aa64isar1; ++ t = cpu->isar.regs[ID_AA64ISAR1]; + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 0); +- cpu->isar.id_aa64isar1 = t; ++ cpu->isar.regs[ID_AA64ISAR1] = t; + +- t = cpu->isar.id_aa64pfr0; ++ t = cpu->isar.regs[ID_AA64PFR0]; + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 0xf); +- cpu->isar.id_aa64pfr0 = t; ++ cpu->isar.regs[ID_AA64PFR0] = t; + +- u = cpu->isar.id_isar5; ++ u = cpu->isar.regs[ID_ISAR5]; + u = FIELD_DP32(u, ID_ISAR5, RDM, 0); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 0); +- cpu->isar.id_isar5 = u; ++ cpu->isar.regs[ID_ISAR5] = u; + +- u = cpu->isar.id_isar6; ++ u = cpu->isar.regs[ID_ISAR6]; + u = FIELD_DP32(u, ID_ISAR6, DP, 0); + u = FIELD_DP32(u, ID_ISAR6, FHM, 0); +- cpu->isar.id_isar6 = u; ++ cpu->isar.regs[ID_ISAR6] = u; + +- u = cpu->isar.mvfr1; ++ u = cpu->isar.regs[MVFR1]; + u = FIELD_DP32(u, MVFR1, SIMDLS, 0); + u = FIELD_DP32(u, MVFR1, SIMDINT, 0); + u = FIELD_DP32(u, MVFR1, SIMDSP, 0); + u = FIELD_DP32(u, MVFR1, SIMDHP, 0); + u = FIELD_DP32(u, MVFR1, SIMDFMAC, 0); +- cpu->isar.mvfr1 = u; ++ cpu->isar.regs[MVFR1] = u; + +- u = cpu->isar.mvfr2; ++ u = cpu->isar.regs[MVFR2]; + u = FIELD_DP32(u, MVFR2, SIMDMISC, 0); +- cpu->isar.mvfr2 = u; ++ cpu->isar.regs[MVFR2] = u; + } + + if (!cpu->has_neon && !cpu->has_vfp) { + uint64_t t; + uint32_t u; + +- t = cpu->isar.id_aa64isar0; ++ t = cpu->isar.regs[ID_AA64ISAR0]; + t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 0); +- cpu->isar.id_aa64isar0 = t; ++ cpu->isar.regs[ID_AA64ISAR0] = t; + +- t = cpu->isar.id_aa64isar1; ++ t = cpu->isar.regs[ID_AA64ISAR1]; + t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 0); +- cpu->isar.id_aa64isar1 = t; ++ cpu->isar.regs[ID_AA64ISAR1] = t; + +- u = cpu->isar.mvfr0; ++ u = cpu->isar.regs[MVFR0]; + u = FIELD_DP32(u, MVFR0, SIMDREG, 0); +- cpu->isar.mvfr0 = u; ++ cpu->isar.regs[MVFR0] = u; + } + + if (arm_feature(env, ARM_FEATURE_M) && !cpu->has_dsp) { +@@ -1347,19 +1347,19 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + + unset_feature(env, ARM_FEATURE_THUMB_DSP); + +- u = cpu->isar.id_isar1; ++ u = cpu->isar.regs[ID_ISAR1]; + u = FIELD_DP32(u, ID_ISAR1, EXTEND, 1); +- cpu->isar.id_isar1 = u; ++ cpu->isar.regs[ID_ISAR1] = u; + +- u = cpu->isar.id_isar2; ++ u = cpu->isar.regs[ID_ISAR2]; + u = FIELD_DP32(u, ID_ISAR2, MULTU, 1); + u = FIELD_DP32(u, ID_ISAR2, MULTS, 1); +- cpu->isar.id_isar2 = u; ++ cpu->isar.regs[ID_ISAR2] = u; + +- u = cpu->isar.id_isar3; ++ u = cpu->isar.regs[ID_ISAR3]; + u = FIELD_DP32(u, ID_ISAR3, SIMD, 1); + u = FIELD_DP32(u, ID_ISAR3, SATURATE, 0); +- cpu->isar.id_isar3 = u; ++ cpu->isar.regs[ID_ISAR3] = u; + } + + /* Some features automatically imply others: */ +@@ -1499,7 +1499,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12]. + */ + cpu->id_pfr1 &= ~0xf0; +- cpu->isar.id_aa64pfr0 &= ~0xf000; ++ cpu->isar.regs[ID_AA64PFR0] &= ~0xf000; + } + + if (!cpu->has_el2) { +@@ -1522,9 +1522,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + cpu); + #endif + } else { +- cpu->isar.id_aa64dfr0 = +- FIELD_DP64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, PMUVER, 0); +- cpu->isar.id_dfr0 = FIELD_DP32(cpu->isar.id_dfr0, ID_DFR0, PERFMON, 0); ++ cpu->isar.regs[ID_AA64DFR0] = ++ FIELD_DP64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER, 0); ++ cpu->isar.regs[ID_DFR0] = FIELD_DP32(cpu->isar.regs[ID_DFR0], ID_DFR0, ++ PERFMON, 0); + cpu->pmceid0 = 0; + cpu->pmceid1 = 0; + } +@@ -1534,7 +1535,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + * registers if we don't have EL2. These are id_pfr1[15:12] and + * id_aa64pfr0_el1[11:8]. + */ +- cpu->isar.id_aa64pfr0 &= ~0xf00; ++ cpu->isar.regs[ID_AA64PFR0] &= ~0xf00; + cpu->id_pfr1 &= ~0xf000; + } + +@@ -1675,13 +1676,15 @@ static void arm926_initfn(Object *obj) + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ +- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); ++ cpu->isar.regs[ID_ISAR1] = FIELD_DP32(cpu->isar.regs[ID_ISAR1], ID_ISAR1, ++ JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable double precision + * and short vector support even though ARMv5 doesn't have this register. + */ +- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); +- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); ++ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, ++ FPSHVEC, 1); ++ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, FPDP, 1); + } + + static void arm946_initfn(Object *obj) +@@ -1717,13 +1720,15 @@ static void arm1026_initfn(Object *obj) + * ARMv5 does not have the ID_ISAR registers, but we can still + * set the field to indicate Jazelle support within QEMU. + */ +- cpu->isar.id_isar1 = FIELD_DP32(cpu->isar.id_isar1, ID_ISAR1, JAZELLE, 1); ++ cpu->isar.regs[ID_ISAR1] = FIELD_DP32(cpu->isar.regs[ID_ISAR1], ID_ISAR1, ++ JAZELLE, 1); + /* + * Similarly, we need to set MVFR0 fields to enable double precision + * and short vector support even though ARMv5 doesn't have this register. + */ +- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); +- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPDP, 1); ++ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, ++ FPSHVEC, 1); ++ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, FPDP, 1); + + { + /* The 1026 had an IFAR at c6,c0,0,1 rather than the ARMv6 c6,c0,0,2 */ +@@ -1756,22 +1761,22 @@ static void arm1136_r2_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4107b362; + cpu->reset_fpsid = 0x410120b4; +- cpu->isar.mvfr0 = 0x11111111; +- cpu->isar.mvfr1 = 0x00000000; ++ cpu->isar.regs[MVFR0] = 0x11111111; ++ cpu->isar.regs[MVFR1] = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->isar.id_dfr0 = 0x2; ++ cpu->isar.regs[ID_DFR0] = 0x2; + cpu->id_afr0 = 0x3; +- cpu->isar.id_mmfr0 = 0x01130003; +- cpu->isar.id_mmfr1 = 0x10030302; +- cpu->isar.id_mmfr2 = 0x01222110; +- cpu->isar.id_isar0 = 0x00140011; +- cpu->isar.id_isar1 = 0x12002111; +- cpu->isar.id_isar2 = 0x11231111; +- cpu->isar.id_isar3 = 0x01102131; +- cpu->isar.id_isar4 = 0x141; ++ cpu->isar.regs[ID_MMFR0] = 0x01130003; ++ cpu->isar.regs[ID_MMFR1] = 0x10030302; ++ cpu->isar.regs[ID_MMFR2] = 0x01222110; ++ cpu->isar.regs[ID_ISAR0] = 0x00140011; ++ cpu->isar.regs[ID_ISAR1] = 0x12002111; ++ cpu->isar.regs[ID_ISAR2] = 0x11231111; ++ cpu->isar.regs[ID_ISAR3] = 0x01102131; ++ cpu->isar.regs[ID_ISAR4] = 0x141; + cpu->reset_auxcr = 7; + } + +@@ -1788,22 +1793,22 @@ static void arm1136_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_CACHE_BLOCK_OPS); + cpu->midr = 0x4117b363; + cpu->reset_fpsid = 0x410120b4; +- cpu->isar.mvfr0 = 0x11111111; +- cpu->isar.mvfr1 = 0x00000000; ++ cpu->isar.regs[MVFR0] = 0x11111111; ++ cpu->isar.regs[MVFR1] = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->isar.id_dfr0 = 0x2; ++ cpu->isar.regs[ID_DFR0] = 0x2; + cpu->id_afr0 = 0x3; +- cpu->isar.id_mmfr0 = 0x01130003; +- cpu->isar.id_mmfr1 = 0x10030302; +- cpu->isar.id_mmfr2 = 0x01222110; +- cpu->isar.id_isar0 = 0x00140011; +- cpu->isar.id_isar1 = 0x12002111; +- cpu->isar.id_isar2 = 0x11231111; +- cpu->isar.id_isar3 = 0x01102131; +- cpu->isar.id_isar4 = 0x141; ++ cpu->isar.regs[ID_MMFR0] = 0x01130003; ++ cpu->isar.regs[ID_MMFR1] = 0x10030302; ++ cpu->isar.regs[ID_MMFR2] = 0x01222110; ++ cpu->isar.regs[ID_ISAR0] = 0x00140011; ++ cpu->isar.regs[ID_ISAR1] = 0x12002111; ++ cpu->isar.regs[ID_ISAR2] = 0x11231111; ++ cpu->isar.regs[ID_ISAR3] = 0x01102131; ++ cpu->isar.regs[ID_ISAR4] = 0x141; + cpu->reset_auxcr = 7; + } + +@@ -1821,22 +1826,22 @@ static void arm1176_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->midr = 0x410fb767; + cpu->reset_fpsid = 0x410120b5; +- cpu->isar.mvfr0 = 0x11111111; +- cpu->isar.mvfr1 = 0x00000000; ++ cpu->isar.regs[MVFR0] = 0x11111111; ++ cpu->isar.regs[MVFR1] = 0x00000000; + cpu->ctr = 0x1dd20d2; + cpu->reset_sctlr = 0x00050078; + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x11; +- cpu->isar.id_dfr0 = 0x33; ++ cpu->isar.regs[ID_DFR0] = 0x33; + cpu->id_afr0 = 0; +- cpu->isar.id_mmfr0 = 0x01130003; +- cpu->isar.id_mmfr1 = 0x10030302; +- cpu->isar.id_mmfr2 = 0x01222100; +- cpu->isar.id_isar0 = 0x0140011; +- cpu->isar.id_isar1 = 0x12002111; +- cpu->isar.id_isar2 = 0x11231121; +- cpu->isar.id_isar3 = 0x01102131; +- cpu->isar.id_isar4 = 0x01141; ++ cpu->isar.regs[ID_MMFR0] = 0x01130003; ++ cpu->isar.regs[ID_MMFR1] = 0x10030302; ++ cpu->isar.regs[ID_MMFR2] = 0x01222100; ++ cpu->isar.regs[ID_ISAR0] = 0x0140011; ++ cpu->isar.regs[ID_ISAR1] = 0x12002111; ++ cpu->isar.regs[ID_ISAR2] = 0x11231121; ++ cpu->isar.regs[ID_ISAR3] = 0x01102131; ++ cpu->isar.regs[ID_ISAR4] = 0x01141; + cpu->reset_auxcr = 7; + } + +@@ -1852,21 +1857,21 @@ static void arm11mpcore_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); + cpu->midr = 0x410fb022; + cpu->reset_fpsid = 0x410120b4; +- cpu->isar.mvfr0 = 0x11111111; +- cpu->isar.mvfr1 = 0x00000000; ++ cpu->isar.regs[MVFR0] = 0x11111111; ++ cpu->isar.regs[MVFR1] = 0x00000000; + cpu->ctr = 0x1d192992; /* 32K icache 32K dcache */ + cpu->id_pfr0 = 0x111; + cpu->id_pfr1 = 0x1; +- cpu->isar.id_dfr0 = 0; ++ cpu->isar.regs[ID_DFR0] = 0; + cpu->id_afr0 = 0x2; +- cpu->isar.id_mmfr0 = 0x01100103; +- cpu->isar.id_mmfr1 = 0x10020302; +- cpu->isar.id_mmfr2 = 0x01222000; +- cpu->isar.id_isar0 = 0x00100011; +- cpu->isar.id_isar1 = 0x12002111; +- cpu->isar.id_isar2 = 0x11221011; +- cpu->isar.id_isar3 = 0x01102131; +- cpu->isar.id_isar4 = 0x141; ++ cpu->isar.regs[ID_MMFR0] = 0x01100103; ++ cpu->isar.regs[ID_MMFR1] = 0x10020302; ++ cpu->isar.regs[ID_MMFR2] = 0x01222000; ++ cpu->isar.regs[ID_ISAR0] = 0x00100011; ++ cpu->isar.regs[ID_ISAR1] = 0x12002111; ++ cpu->isar.regs[ID_ISAR2] = 0x11221011; ++ cpu->isar.regs[ID_ISAR3] = 0x01102131; ++ cpu->isar.regs[ID_ISAR4] = 0x141; + cpu->reset_auxcr = 1; + } + +@@ -1889,19 +1894,19 @@ static void cortex_m3_initfn(Object *obj) + cpu->pmsav7_dregion = 8; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; +- cpu->isar.id_dfr0 = 0x00100000; ++ cpu->isar.regs[ID_DFR0] = 0x00100000; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x00000030; +- cpu->isar.id_mmfr1 = 0x00000000; +- cpu->isar.id_mmfr2 = 0x00000000; +- cpu->isar.id_mmfr3 = 0x00000000; +- cpu->isar.id_isar0 = 0x01141110; +- cpu->isar.id_isar1 = 0x02111000; +- cpu->isar.id_isar2 = 0x21112231; +- cpu->isar.id_isar3 = 0x01111110; +- cpu->isar.id_isar4 = 0x01310102; +- cpu->isar.id_isar5 = 0x00000000; +- cpu->isar.id_isar6 = 0x00000000; ++ cpu->isar.regs[ID_MMFR0] = 0x00000030; ++ cpu->isar.regs[ID_MMFR1] = 0x00000000; ++ cpu->isar.regs[ID_MMFR2] = 0x00000000; ++ cpu->isar.regs[ID_MMFR3] = 0x00000000; ++ cpu->isar.regs[ID_ISAR0] = 0x01141110; ++ cpu->isar.regs[ID_ISAR1] = 0x02111000; ++ cpu->isar.regs[ID_ISAR2] = 0x21112231; ++ cpu->isar.regs[ID_ISAR3] = 0x01111110; ++ cpu->isar.regs[ID_ISAR4] = 0x01310102; ++ cpu->isar.regs[ID_ISAR5] = 0x00000000; ++ cpu->isar.regs[ID_ISAR6] = 0x00000000; + } + + static void cortex_m4_initfn(Object *obj) +@@ -1915,24 +1920,24 @@ static void cortex_m4_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_VFP4); + cpu->midr = 0x410fc240; /* r0p0 */ + cpu->pmsav7_dregion = 8; +- cpu->isar.mvfr0 = 0x10110021; +- cpu->isar.mvfr1 = 0x11000011; +- cpu->isar.mvfr2 = 0x00000000; ++ cpu->isar.regs[MVFR0] = 0x10110021; ++ cpu->isar.regs[MVFR1] = 0x11000011; ++ cpu->isar.regs[MVFR2] = 0x00000000; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000200; +- cpu->isar.id_dfr0 = 0x00100000; ++ cpu->isar.regs[ID_DFR0] = 0x00100000; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x00000030; +- cpu->isar.id_mmfr1 = 0x00000000; +- cpu->isar.id_mmfr2 = 0x00000000; +- cpu->isar.id_mmfr3 = 0x00000000; +- cpu->isar.id_isar0 = 0x01141110; +- cpu->isar.id_isar1 = 0x02111000; +- cpu->isar.id_isar2 = 0x21112231; +- cpu->isar.id_isar3 = 0x01111110; +- cpu->isar.id_isar4 = 0x01310102; +- cpu->isar.id_isar5 = 0x00000000; +- cpu->isar.id_isar6 = 0x00000000; ++ cpu->isar.regs[ID_MMFR0] = 0x00000030; ++ cpu->isar.regs[ID_MMFR1] = 0x00000000; ++ cpu->isar.regs[ID_MMFR2] = 0x00000000; ++ cpu->isar.regs[ID_MMFR3] = 0x00000000; ++ cpu->isar.regs[ID_ISAR0] = 0x01141110; ++ cpu->isar.regs[ID_ISAR1] = 0x02111000; ++ cpu->isar.regs[ID_ISAR2] = 0x21112231; ++ cpu->isar.regs[ID_ISAR3] = 0x01111110; ++ cpu->isar.regs[ID_ISAR4] = 0x01310102; ++ cpu->isar.regs[ID_ISAR5] = 0x00000000; ++ cpu->isar.regs[ID_ISAR6] = 0x00000000; + } + + static void cortex_m33_initfn(Object *obj) +@@ -1948,24 +1953,24 @@ static void cortex_m33_initfn(Object *obj) + cpu->midr = 0x410fd213; /* r0p3 */ + cpu->pmsav7_dregion = 16; + cpu->sau_sregion = 8; +- cpu->isar.mvfr0 = 0x10110021; +- cpu->isar.mvfr1 = 0x11000011; +- cpu->isar.mvfr2 = 0x00000040; ++ cpu->isar.regs[MVFR0] = 0x10110021; ++ cpu->isar.regs[MVFR1] = 0x11000011; ++ cpu->isar.regs[MVFR2] = 0x00000040; + cpu->id_pfr0 = 0x00000030; + cpu->id_pfr1 = 0x00000210; +- cpu->isar.id_dfr0 = 0x00200000; ++ cpu->isar.regs[ID_DFR0] = 0x00200000; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x00101F40; +- cpu->isar.id_mmfr1 = 0x00000000; +- cpu->isar.id_mmfr2 = 0x01000000; +- cpu->isar.id_mmfr3 = 0x00000000; +- cpu->isar.id_isar0 = 0x01101110; +- cpu->isar.id_isar1 = 0x02212000; +- cpu->isar.id_isar2 = 0x20232232; +- cpu->isar.id_isar3 = 0x01111131; +- cpu->isar.id_isar4 = 0x01310132; +- cpu->isar.id_isar5 = 0x00000000; +- cpu->isar.id_isar6 = 0x00000000; ++ cpu->isar.regs[ID_MMFR0] = 0x00101F40; ++ cpu->isar.regs[ID_MMFR1] = 0x00000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01000000; ++ cpu->isar.regs[ID_MMFR3] = 0x00000000; ++ cpu->isar.regs[ID_ISAR0] = 0x01101110; ++ cpu->isar.regs[ID_ISAR1] = 0x02212000; ++ cpu->isar.regs[ID_ISAR2] = 0x20232232; ++ cpu->isar.regs[ID_ISAR3] = 0x01111131; ++ cpu->isar.regs[ID_ISAR4] = 0x01310132; ++ cpu->isar.regs[ID_ISAR5] = 0x00000000; ++ cpu->isar.regs[ID_ISAR6] = 0x00000000; + cpu->clidr = 0x00000000; + cpu->ctr = 0x8000c000; + } +@@ -2004,19 +2009,19 @@ static void cortex_r5_initfn(Object *obj) + cpu->midr = 0x411fc153; /* r1p3 */ + cpu->id_pfr0 = 0x0131; + cpu->id_pfr1 = 0x001; +- cpu->isar.id_dfr0 = 0x010400; ++ cpu->isar.regs[ID_DFR0] = 0x010400; + cpu->id_afr0 = 0x0; +- cpu->isar.id_mmfr0 = 0x0210030; +- cpu->isar.id_mmfr1 = 0x00000000; +- cpu->isar.id_mmfr2 = 0x01200000; +- cpu->isar.id_mmfr3 = 0x0211; +- cpu->isar.id_isar0 = 0x02101111; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232141; +- cpu->isar.id_isar3 = 0x01112131; +- cpu->isar.id_isar4 = 0x0010142; +- cpu->isar.id_isar5 = 0x0; +- cpu->isar.id_isar6 = 0x0; ++ cpu->isar.regs[ID_MMFR0] = 0x0210030; ++ cpu->isar.regs[ID_MMFR1] = 0x00000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01200000; ++ cpu->isar.regs[ID_MMFR3] = 0x0211; ++ cpu->isar.regs[ID_ISAR0] = 0x02101111; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232141; ++ cpu->isar.regs[ID_ISAR3] = 0x01112131; ++ cpu->isar.regs[ID_ISAR4] = 0x0010142; ++ cpu->isar.regs[ID_ISAR5] = 0x0; ++ cpu->isar.regs[ID_ISAR6] = 0x0; + cpu->mp_is_up = true; + cpu->pmsav7_dregion = 16; + define_arm_cp_regs(cpu, cortexr5_cp_reginfo); +@@ -2028,8 +2033,8 @@ static void cortex_r5f_initfn(Object *obj) + + cortex_r5_initfn(obj); + set_feature(&cpu->env, ARM_FEATURE_VFP3); +- cpu->isar.mvfr0 = 0x10110221; +- cpu->isar.mvfr1 = 0x00000011; ++ cpu->isar.regs[MVFR0] = 0x10110221; ++ cpu->isar.regs[MVFR1] = 0x00000011; + } + + static const ARMCPRegInfo cortexa8_cp_reginfo[] = { +@@ -2053,24 +2058,24 @@ static void cortex_a8_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_EL3); + cpu->midr = 0x410fc080; + cpu->reset_fpsid = 0x410330c0; +- cpu->isar.mvfr0 = 0x11110222; +- cpu->isar.mvfr1 = 0x00011111; ++ cpu->isar.regs[MVFR0] = 0x11110222; ++ cpu->isar.regs[MVFR1] = 0x00011111; + cpu->ctr = 0x82048004; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; +- cpu->isar.id_dfr0 = 0x400; ++ cpu->isar.regs[ID_DFR0] = 0x400; + cpu->id_afr0 = 0; +- cpu->isar.id_mmfr0 = 0x31100003; +- cpu->isar.id_mmfr1 = 0x20000000; +- cpu->isar.id_mmfr2 = 0x01202000; +- cpu->isar.id_mmfr3 = 0x11; +- cpu->isar.id_isar0 = 0x00101111; +- cpu->isar.id_isar1 = 0x12112111; +- cpu->isar.id_isar2 = 0x21232031; +- cpu->isar.id_isar3 = 0x11112131; +- cpu->isar.id_isar4 = 0x00111142; +- cpu->isar.dbgdidr = 0x15141000; ++ cpu->isar.regs[ID_MMFR0] = 0x31100003; ++ cpu->isar.regs[ID_MMFR1] = 0x20000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01202000; ++ cpu->isar.regs[ID_MMFR3] = 0x11; ++ cpu->isar.regs[ID_ISAR0] = 0x00101111; ++ cpu->isar.regs[ID_ISAR1] = 0x12112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232031; ++ cpu->isar.regs[ID_ISAR3] = 0x11112131; ++ cpu->isar.regs[ID_ISAR4] = 0x00111142; ++ cpu->isar.regs[DBGDIDR] = 0x15141000; + cpu->clidr = (1 << 27) | (2 << 24) | 3; + cpu->ccsidr[0] = 0xe007e01a; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x2007e01a; /* 16k L1 icache. */ +@@ -2126,24 +2131,24 @@ static void cortex_a9_initfn(Object *obj) + set_feature(&cpu->env, ARM_FEATURE_CBAR); + cpu->midr = 0x410fc090; + cpu->reset_fpsid = 0x41033090; +- cpu->isar.mvfr0 = 0x11110222; +- cpu->isar.mvfr1 = 0x01111111; ++ cpu->isar.regs[MVFR0] = 0x11110222; ++ cpu->isar.regs[MVFR1] = 0x01111111; + cpu->ctr = 0x80038003; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x1031; + cpu->id_pfr1 = 0x11; +- cpu->isar.id_dfr0 = 0x000; ++ cpu->isar.regs[ID_DFR0] = 0x000; + cpu->id_afr0 = 0; +- cpu->isar.id_mmfr0 = 0x00100103; +- cpu->isar.id_mmfr1 = 0x20000000; +- cpu->isar.id_mmfr2 = 0x01230000; +- cpu->isar.id_mmfr3 = 0x00002111; +- cpu->isar.id_isar0 = 0x00101111; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232041; +- cpu->isar.id_isar3 = 0x11112131; +- cpu->isar.id_isar4 = 0x00111142; +- cpu->isar.dbgdidr = 0x35141000; ++ cpu->isar.regs[ID_MMFR0] = 0x00100103; ++ cpu->isar.regs[ID_MMFR1] = 0x20000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01230000; ++ cpu->isar.regs[ID_MMFR3] = 0x00002111; ++ cpu->isar.regs[ID_ISAR0] = 0x00101111; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232041; ++ cpu->isar.regs[ID_ISAR3] = 0x11112131; ++ cpu->isar.regs[ID_ISAR4] = 0x00111142; ++ cpu->isar.regs[DBGDIDR] = 0x35141000; + cpu->clidr = (1 << 27) | (1 << 24) | 3; + cpu->ccsidr[0] = 0xe00fe019; /* 16k L1 dcache. */ + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ +@@ -2191,27 +2196,27 @@ static void cortex_a7_initfn(Object *obj) + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7; + cpu->midr = 0x410fc075; + cpu->reset_fpsid = 0x41023075; +- cpu->isar.mvfr0 = 0x10110222; +- cpu->isar.mvfr1 = 0x11111111; ++ cpu->isar.regs[MVFR0] = 0x10110222; ++ cpu->isar.regs[MVFR1] = 0x11111111; + cpu->ctr = 0x84448003; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x00001131; + cpu->id_pfr1 = 0x00011011; +- cpu->isar.id_dfr0 = 0x02010555; ++ cpu->isar.regs[ID_DFR0] = 0x02010555; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x10101105; +- cpu->isar.id_mmfr1 = 0x40000000; +- cpu->isar.id_mmfr2 = 0x01240000; +- cpu->isar.id_mmfr3 = 0x02102211; ++ cpu->isar.regs[ID_MMFR0] = 0x10101105; ++ cpu->isar.regs[ID_MMFR1] = 0x40000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01240000; ++ cpu->isar.regs[ID_MMFR3] = 0x02102211; + /* a7_mpcore_r0p5_trm, page 4-4 gives 0x01101110; but + * table 4-41 gives 0x02101110, which includes the arm div insns. + */ +- cpu->isar.id_isar0 = 0x02101110; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232041; +- cpu->isar.id_isar3 = 0x11112131; +- cpu->isar.id_isar4 = 0x10011142; +- cpu->isar.dbgdidr = 0x3515f005; ++ cpu->isar.regs[ID_ISAR0] = 0x02101110; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232041; ++ cpu->isar.regs[ID_ISAR3] = 0x11112131; ++ cpu->isar.regs[ID_ISAR4] = 0x10011142; ++ cpu->isar.regs[DBGDIDR] = 0x3515f005; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ +@@ -2237,24 +2242,24 @@ static void cortex_a15_initfn(Object *obj) + cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A15; + cpu->midr = 0x412fc0f1; + cpu->reset_fpsid = 0x410430f0; +- cpu->isar.mvfr0 = 0x10110222; +- cpu->isar.mvfr1 = 0x11111111; ++ cpu->isar.regs[MVFR0] = 0x10110222; ++ cpu->isar.regs[MVFR1] = 0x11111111; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50078; + cpu->id_pfr0 = 0x00001131; + cpu->id_pfr1 = 0x00011011; +- cpu->isar.id_dfr0 = 0x02010555; ++ cpu->isar.regs[ID_DFR0] = 0x02010555; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x10201105; +- cpu->isar.id_mmfr1 = 0x20000000; +- cpu->isar.id_mmfr2 = 0x01240000; +- cpu->isar.id_mmfr3 = 0x02102211; +- cpu->isar.id_isar0 = 0x02101110; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232041; +- cpu->isar.id_isar3 = 0x11112131; +- cpu->isar.id_isar4 = 0x10011142; +- cpu->isar.dbgdidr = 0x3515f021; ++ cpu->isar.regs[ID_MMFR0] = 0x10201105; ++ cpu->isar.regs[ID_MMFR1] = 0x20000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01240000; ++ cpu->isar.regs[ID_MMFR3] = 0x02102211; ++ cpu->isar.regs[ID_ISAR0] = 0x02101110; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232041; ++ cpu->isar.regs[ID_ISAR3] = 0x11112131; ++ cpu->isar.regs[ID_ISAR4] = 0x10011142; ++ cpu->isar.regs[DBGDIDR] = 0x3515f021; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */ +@@ -2447,7 +2452,8 @@ static void arm_max_initfn(Object *obj) + cortex_a15_initfn(obj); + + /* old-style VFP short-vector support */ +- cpu->isar.mvfr0 = FIELD_DP32(cpu->isar.mvfr0, MVFR0, FPSHVEC, 1); ++ cpu->isar.regs[MVFR0] = FIELD_DP32(cpu->isar.regs[MVFR0], MVFR0, ++ FPSHVEC, 1); + + #ifdef CONFIG_USER_ONLY + /* We don't set these in system emulation mode for the moment, +@@ -2458,39 +2464,39 @@ static void arm_max_initfn(Object *obj) + { + uint32_t t; + +- t = cpu->isar.id_isar5; ++ t = cpu->isar.regs[ID_ISAR5]; + t = FIELD_DP32(t, ID_ISAR5, AES, 2); + t = FIELD_DP32(t, ID_ISAR5, SHA1, 1); + t = FIELD_DP32(t, ID_ISAR5, SHA2, 1); + t = FIELD_DP32(t, ID_ISAR5, CRC32, 1); + t = FIELD_DP32(t, ID_ISAR5, RDM, 1); + t = FIELD_DP32(t, ID_ISAR5, VCMA, 1); +- cpu->isar.id_isar5 = t; ++ cpu->isar.regs[ID_ISAR5] = t; + +- t = cpu->isar.id_isar6; ++ t = cpu->isar.regs[ID_ISAR6]; + t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1); + t = FIELD_DP32(t, ID_ISAR6, DP, 1); + t = FIELD_DP32(t, ID_ISAR6, FHM, 1); + t = FIELD_DP32(t, ID_ISAR6, SB, 1); + t = FIELD_DP32(t, ID_ISAR6, SPECRES, 1); +- cpu->isar.id_isar6 = t; ++ cpu->isar.regs[ID_ISAR6] = t; + +- t = cpu->isar.mvfr1; ++ t = cpu->isar.regs[MVFR1]; + t = FIELD_DP32(t, MVFR1, FPHP, 2); /* v8.0 FP support */ +- cpu->isar.mvfr1 = t; ++ cpu->isar.regs[MVFR1] = t; + +- t = cpu->isar.mvfr2; ++ t = cpu->isar.regs[MVFR2]; + t = FIELD_DP32(t, MVFR2, SIMDMISC, 3); /* SIMD MaxNum */ + t = FIELD_DP32(t, MVFR2, FPMISC, 4); /* FP MaxNum */ +- cpu->isar.mvfr2 = t; ++ cpu->isar.regs[MVFR2] = t; + +- t = cpu->isar.id_mmfr3; ++ t = cpu->isar.regs[ID_MMFR3]; + t = FIELD_DP32(t, ID_MMFR3, PAN, 2); /* ATS1E1 */ +- cpu->isar.id_mmfr3 = t; ++ cpu->isar.regs[ID_MMFR3] = t; + +- t = cpu->isar.id_mmfr4; ++ t = cpu->isar.regs[ID_MMFR4]; + t = FIELD_DP32(t, ID_MMFR4, HPDS, 1); /* AA32HPD */ +- cpu->isar.id_mmfr4 = t; ++ cpu->isar.regs[ID_MMFR4] = t; + } + #endif + } +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 56d8cd8c..7bb481fb 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -63,6 +63,37 @@ + #define ARMV7M_EXCP_PENDSV 14 + #define ARMV7M_EXCP_SYSTICK 15 + ++typedef enum CPUIDReg { ++ MIDR_EL1, ++ ID_ISAR0, ++ ID_ISAR1, ++ ID_ISAR2, ++ ID_ISAR3, ++ ID_ISAR4, ++ ID_ISAR5, ++ ID_ISAR6, ++ ID_MMFR0, ++ ID_MMFR1, ++ ID_MMFR2, ++ ID_MMFR3, ++ ID_MMFR4, ++ ID_AA64ISAR0, ++ ID_AA64ISAR1, ++ ID_AA64PFR0, ++ ID_AA64PFR1, ++ ID_AA64MMFR0, ++ ID_AA64MMFR1, ++ ID_AA64MMFR2, ++ ID_AA64DFR0, ++ ID_AA64DFR1, ++ ID_DFR0, ++ MVFR0, ++ MVFR1, ++ MVFR2, ++ DBGDIDR, ++ ID_MAX, ++} CPUIDReg; ++ + /* For M profile, some registers are banked secure vs non-secure; + * these are represented as a 2-element array where the first element + * is the non-secure copy and the second is the secure copy. +@@ -855,32 +886,7 @@ struct ARMCPU { + * field by reading the value from the KVM vCPU. + */ + struct ARMISARegisters { +- uint32_t id_isar0; +- uint32_t id_isar1; +- uint32_t id_isar2; +- uint32_t id_isar3; +- uint32_t id_isar4; +- uint32_t id_isar5; +- uint32_t id_isar6; +- uint32_t id_mmfr0; +- uint32_t id_mmfr1; +- uint32_t id_mmfr2; +- uint32_t id_mmfr3; +- uint32_t id_mmfr4; +- uint32_t mvfr0; +- uint32_t mvfr1; +- uint32_t mvfr2; +- uint32_t id_dfr0; +- uint32_t dbgdidr; +- uint64_t id_aa64isar0; +- uint64_t id_aa64isar1; +- uint64_t id_aa64pfr0; +- uint64_t id_aa64pfr1; +- uint64_t id_aa64mmfr0; +- uint64_t id_aa64mmfr1; +- uint64_t id_aa64mmfr2; +- uint64_t id_aa64dfr0; +- uint64_t id_aa64dfr1; ++ uint64_t regs[ID_MAX]; + } isar; + uint32_t midr; + uint32_t revidr; +@@ -3358,77 +3364,77 @@ extern const uint64_t pred_esz_masks[4]; + */ + static inline bool isar_feature_thumb_div(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR0], ID_ISAR0, DIVIDE) != 0; + } + + static inline bool isar_feature_arm_div(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar0, ID_ISAR0, DIVIDE) > 1; ++ return FIELD_EX32(id->regs[ID_ISAR0], ID_ISAR0, DIVIDE) > 1; + } + + static inline bool isar_feature_jazelle(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar1, ID_ISAR1, JAZELLE) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR1], ID_ISAR1, JAZELLE) != 0; + } + + static inline bool isar_feature_aa32_aes(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, AES) != 0; + } + + static inline bool isar_feature_aa32_pmull(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, AES) > 1; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, AES) > 1; + } + + static inline bool isar_feature_aa32_sha1(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA1) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, SHA1) != 0; + } + + static inline bool isar_feature_aa32_sha2(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, SHA2) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, SHA2) != 0; + } + + static inline bool isar_feature_aa32_crc32(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, CRC32) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, CRC32) != 0; + } + + static inline bool isar_feature_aa32_rdm(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, RDM) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, RDM) != 0; + } + + static inline bool isar_feature_aa32_vcma(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR5], ID_ISAR5, VCMA) != 0; + } + + static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, JSCVT) != 0; + } + + static inline bool isar_feature_aa32_dp(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, DP) != 0; + } + + static inline bool isar_feature_aa32_fhm(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar6, ID_ISAR6, FHM) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, FHM) != 0; + } + + static inline bool isar_feature_aa32_sb(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar6, ID_ISAR6, SB) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, SB) != 0; + } + + static inline bool isar_feature_aa32_predinv(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_isar6, ID_ISAR6, SPECRES) != 0; ++ return FIELD_EX32(id->regs[ID_ISAR6], ID_ISAR6, SPECRES) != 0; + } + + static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) +@@ -3438,24 +3444,24 @@ static inline bool isar_feature_aa32_fp16_arith(const ARMISARegisters *id) + * the ARMv8.2-FP16 extension is implemented for aa32 mode. + * At which point we can properly set and check MVFR1.FPHP. + */ +- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; ++ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, FP) == 1; + } + + static inline bool isar_feature_aa32_fp_d32(const ARMISARegisters *id) + { + /* Return true if D16-D31 are implemented */ +- return FIELD_EX64(id->mvfr0, MVFR0, SIMDREG) >= 2; ++ return FIELD_EX64(id->regs[MVFR0], MVFR0, SIMDREG) >= 2; + } + + static inline bool isar_feature_aa32_fpshvec(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr0, MVFR0, FPSHVEC) > 0; ++ return FIELD_EX64(id->regs[MVFR0], MVFR0, FPSHVEC) > 0; + } + + static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) + { + /* Return true if CPU supports double precision floating point */ +- return FIELD_EX64(id->mvfr0, MVFR0, FPDP) > 0; ++ return FIELD_EX64(id->regs[MVFR0], MVFR0, FPDP) > 0; + } + + /* +@@ -3465,49 +3471,49 @@ static inline bool isar_feature_aa32_fpdp(const ARMISARegisters *id) + */ + static inline bool isar_feature_aa32_fp16_spconv(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 0; ++ return FIELD_EX64(id->regs[MVFR1], MVFR1, FPHP) > 0; + } + + static inline bool isar_feature_aa32_fp16_dpconv(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr1, MVFR1, FPHP) > 1; ++ return FIELD_EX64(id->regs[MVFR1], MVFR1, FPHP) > 1; + } + + static inline bool isar_feature_aa32_vsel(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 1; ++ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 1; + } + + static inline bool isar_feature_aa32_vcvt_dr(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 2; ++ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 2; + } + + static inline bool isar_feature_aa32_vrint(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 3; ++ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 3; + } + + static inline bool isar_feature_aa32_vminmaxnm(const ARMISARegisters *id) + { +- return FIELD_EX64(id->mvfr2, MVFR2, FPMISC) >= 4; ++ return FIELD_EX64(id->regs[MVFR2], MVFR2, FPMISC) >= 4; + } + + static inline bool isar_feature_aa32_pan(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) != 0; ++ return FIELD_EX32(id->regs[ID_MMFR3], ID_MMFR3, PAN) != 0; + } + + static inline bool isar_feature_aa32_ats1e1(const ARMISARegisters *id) + { +- return FIELD_EX32(id->id_mmfr3, ID_MMFR3, PAN) >= 2; ++ return FIELD_EX32(id->regs[ID_MMFR3], ID_MMFR3, PAN) >= 2; + } + + static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) + { + /* 0xf means "non-standard IMPDEF PMU" */ +- return FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) >= 4 && +- FIELD_EX32(id->id_dfr0, ID_DFR0, PERFMON) != 0xf; ++ return FIELD_EX32(id->regs[ID_DFR0], ID_DFR0, PERFMON) >= 4 && ++ FIELD_EX32(id->regs[ID_DFR0], ID_DFR0, PERFMON) != 0xf; + } + + /* +@@ -3515,92 +3521,92 @@ static inline bool isar_feature_aa32_pmu_8_1(const ARMISARegisters *id) + */ + static inline bool isar_feature_aa64_aes(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, AES) != 0; + } + + static inline bool isar_feature_aa64_pmull(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, AES) > 1; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, AES) > 1; + } + + static inline bool isar_feature_aa64_sha1(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA1) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA1) != 0; + } + + static inline bool isar_feature_aa64_sha256(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA2) != 0; + } + + static inline bool isar_feature_aa64_sha512(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA2) > 1; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA2) > 1; + } + + static inline bool isar_feature_aa64_crc32(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, CRC32) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, CRC32) != 0; + } + + static inline bool isar_feature_aa64_atomics(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, ATOMIC) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, ATOMIC) != 0; + } + + static inline bool isar_feature_aa64_rdm(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RDM) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, RDM) != 0; + } + + static inline bool isar_feature_aa64_sha3(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SHA3) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SHA3) != 0; + } + + static inline bool isar_feature_aa64_sm3(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM3) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SM3) != 0; + } + + static inline bool isar_feature_aa64_sm4(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, SM4) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, SM4) != 0; + } + + static inline bool isar_feature_aa64_dp(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, DP) != 0; + } + + static inline bool isar_feature_aa64_fhm(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, FHM) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, FHM) != 0; + } + + static inline bool isar_feature_aa64_condm_4(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, TS) != 0; + } + + static inline bool isar_feature_aa64_condm_5(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, TS) >= 2; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, TS) >= 2; + } + + static inline bool isar_feature_aa64_rndr(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, RNDR) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR0], ID_AA64ISAR0, RNDR) != 0; + } + + static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, JSCVT) != 0; + } + + static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, FCMA) != 0; + } + + static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) +@@ -3611,7 +3617,7 @@ static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) + * defined algorithms, and thus API+GPI, and this predicate controls + * migration of the 128-bit keys. + */ +- return (id->id_aa64isar1 & ++ return (id->regs[ID_AA64ISAR1] & + (FIELD_DP64(0, ID_AA64ISAR1, APA, 0xf) | + FIELD_DP64(0, ID_AA64ISAR1, API, 0xf) | + FIELD_DP64(0, ID_AA64ISAR1, GPA, 0xf) | +@@ -3620,59 +3626,59 @@ static inline bool isar_feature_aa64_pauth(const ARMISARegisters *id) + + static inline bool isar_feature_aa64_sb(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SB) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, SB) != 0; + } + + static inline bool isar_feature_aa64_predinv(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, SPECRES) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, SPECRES) != 0; + } + + static inline bool isar_feature_aa64_frint(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FRINTTS) != 0; ++ return FIELD_EX64(id->regs[ID_AA64ISAR1], ID_AA64ISAR1, FRINTTS) != 0; + } + + static inline bool isar_feature_aa64_fp16(const ARMISARegisters *id) + { + /* We always set the AdvSIMD and FP fields identically wrt FP16. */ +- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, FP) == 1; ++ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, FP) == 1; + } + + static inline bool isar_feature_aa64_aa32(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, EL0) >= 2; ++ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, EL0) >= 2; + } + + static inline bool isar_feature_aa64_sve(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, SVE) != 0; ++ return FIELD_EX64(id->regs[ID_AA64PFR0], ID_AA64PFR0, SVE) != 0; + } + + static inline bool isar_feature_aa64_lor(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, LO) != 0; ++ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, LO) != 0; + } + + static inline bool isar_feature_aa64_pan(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) != 0; ++ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, PAN) != 0; + } + + static inline bool isar_feature_aa64_ats1e1(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, PAN) >= 2; ++ return FIELD_EX64(id->regs[ID_AA64MMFR1], ID_AA64MMFR1, PAN) >= 2; + } + + static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; ++ return FIELD_EX64(id->regs[ID_AA64PFR1], ID_AA64PFR1, BT) != 0; + } + + static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) + { +- return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && +- FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; ++ return FIELD_EX64(id->regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER) >= 4 && ++ FIELD_EX64(id->regs[ID_AA64DFR0], ID_AA64DFR0, PMUVER) != 0xf; + } + + static inline bool isar_feature_any_pmu_8_1(const ARMISARegisters *id) +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index d450b8c8..fe648752 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -116,31 +116,31 @@ static void aarch64_a57_initfn(Object *obj) + cpu->midr = 0x411fd070; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034070; +- cpu->isar.mvfr0 = 0x10110222; +- cpu->isar.mvfr1 = 0x12111111; +- cpu->isar.mvfr2 = 0x00000043; ++ cpu->isar.regs[MVFR0] = 0x10110222; ++ cpu->isar.regs[MVFR1] = 0x12111111; ++ cpu->isar.regs[MVFR2] = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->isar.id_dfr0 = 0x03010066; ++ cpu->isar.regs[ID_DFR0] = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x10101105; +- cpu->isar.id_mmfr1 = 0x40000000; +- cpu->isar.id_mmfr2 = 0x01260000; +- cpu->isar.id_mmfr3 = 0x02102211; +- cpu->isar.id_isar0 = 0x02101110; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232042; +- cpu->isar.id_isar3 = 0x01112131; +- cpu->isar.id_isar4 = 0x00011142; +- cpu->isar.id_isar5 = 0x00011121; +- cpu->isar.id_isar6 = 0; +- cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->isar.id_aa64dfr0 = 0x10305106; +- cpu->isar.id_aa64isar0 = 0x00011120; +- cpu->isar.id_aa64mmfr0 = 0x00001124; +- cpu->isar.dbgdidr = 0x3516d000; ++ cpu->isar.regs[ID_MMFR0] = 0x10101105; ++ cpu->isar.regs[ID_MMFR1] = 0x40000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01260000; ++ cpu->isar.regs[ID_MMFR3] = 0x02102211; ++ cpu->isar.regs[ID_ISAR0] = 0x02101110; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232042; ++ cpu->isar.regs[ID_ISAR3] = 0x01112131; ++ cpu->isar.regs[ID_ISAR4] = 0x00011142; ++ cpu->isar.regs[ID_ISAR5] = 0x00011121; ++ cpu->isar.regs[ID_ISAR6] = 0; ++ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; ++ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; ++ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; ++ cpu->isar.regs[ID_AA64MMFR0] = 0x00001124; ++ cpu->isar.regs[DBGDIDR] = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ +@@ -170,31 +170,31 @@ static void aarch64_a53_initfn(Object *obj) + cpu->midr = 0x410fd034; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034070; +- cpu->isar.mvfr0 = 0x10110222; +- cpu->isar.mvfr1 = 0x12111111; +- cpu->isar.mvfr2 = 0x00000043; ++ cpu->isar.regs[MVFR0] = 0x10110222; ++ cpu->isar.regs[MVFR1] = 0x12111111; ++ cpu->isar.regs[MVFR2] = 0x00000043; + cpu->ctr = 0x84448004; /* L1Ip = VIPT */ + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->isar.id_dfr0 = 0x03010066; ++ cpu->isar.regs[ID_DFR0] = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x10101105; +- cpu->isar.id_mmfr1 = 0x40000000; +- cpu->isar.id_mmfr2 = 0x01260000; +- cpu->isar.id_mmfr3 = 0x02102211; +- cpu->isar.id_isar0 = 0x02101110; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232042; +- cpu->isar.id_isar3 = 0x01112131; +- cpu->isar.id_isar4 = 0x00011142; +- cpu->isar.id_isar5 = 0x00011121; +- cpu->isar.id_isar6 = 0; +- cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->isar.id_aa64dfr0 = 0x10305106; +- cpu->isar.id_aa64isar0 = 0x00011120; +- cpu->isar.id_aa64mmfr0 = 0x00001122; /* 40 bit physical addr */ +- cpu->isar.dbgdidr = 0x3516d000; ++ cpu->isar.regs[ID_MMFR0] = 0x10101105; ++ cpu->isar.regs[ID_MMFR1] = 0x40000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01260000; ++ cpu->isar.regs[ID_MMFR3] = 0x02102211; ++ cpu->isar.regs[ID_ISAR0] = 0x02101110; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232042; ++ cpu->isar.regs[ID_ISAR3] = 0x01112131; ++ cpu->isar.regs[ID_ISAR4] = 0x00011142; ++ cpu->isar.regs[ID_ISAR5] = 0x00011121; ++ cpu->isar.regs[ID_ISAR6] = 0; ++ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; ++ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; ++ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; ++ cpu->isar.regs[ID_AA64MMFR0] = 0x00001122; /* 40 bit physical addr */ ++ cpu->isar.regs[DBGDIDR] = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x700fe01a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe00a; /* 32KB L1 icache */ +@@ -224,30 +224,30 @@ static void aarch64_a72_initfn(Object *obj) + cpu->midr = 0x410fd083; + cpu->revidr = 0x00000000; + cpu->reset_fpsid = 0x41034080; +- cpu->isar.mvfr0 = 0x10110222; +- cpu->isar.mvfr1 = 0x12111111; +- cpu->isar.mvfr2 = 0x00000043; ++ cpu->isar.regs[MVFR0] = 0x10110222; ++ cpu->isar.regs[MVFR1] = 0x12111111; ++ cpu->isar.regs[MVFR2] = 0x00000043; + cpu->ctr = 0x8444c004; + cpu->reset_sctlr = 0x00c50838; + cpu->id_pfr0 = 0x00000131; + cpu->id_pfr1 = 0x00011011; +- cpu->isar.id_dfr0 = 0x03010066; ++ cpu->isar.regs[ID_DFR0] = 0x03010066; + cpu->id_afr0 = 0x00000000; +- cpu->isar.id_mmfr0 = 0x10201105; +- cpu->isar.id_mmfr1 = 0x40000000; +- cpu->isar.id_mmfr2 = 0x01260000; +- cpu->isar.id_mmfr3 = 0x02102211; +- cpu->isar.id_isar0 = 0x02101110; +- cpu->isar.id_isar1 = 0x13112111; +- cpu->isar.id_isar2 = 0x21232042; +- cpu->isar.id_isar3 = 0x01112131; +- cpu->isar.id_isar4 = 0x00011142; +- cpu->isar.id_isar5 = 0x00011121; +- cpu->isar.id_aa64pfr0 = 0x00002222; +- cpu->isar.id_aa64dfr0 = 0x10305106; +- cpu->isar.id_aa64isar0 = 0x00011120; +- cpu->isar.id_aa64mmfr0 = 0x00001124; +- cpu->isar.dbgdidr = 0x3516d000; ++ cpu->isar.regs[ID_MMFR0] = 0x10201105; ++ cpu->isar.regs[ID_MMFR1] = 0x40000000; ++ cpu->isar.regs[ID_MMFR2] = 0x01260000; ++ cpu->isar.regs[ID_MMFR3] = 0x02102211; ++ cpu->isar.regs[ID_ISAR0] = 0x02101110; ++ cpu->isar.regs[ID_ISAR1] = 0x13112111; ++ cpu->isar.regs[ID_ISAR2] = 0x21232042; ++ cpu->isar.regs[ID_ISAR3] = 0x01112131; ++ cpu->isar.regs[ID_ISAR4] = 0x00011142; ++ cpu->isar.regs[ID_ISAR5] = 0x00011121; ++ cpu->isar.regs[ID_AA64PFR0] = 0x00002222; ++ cpu->isar.regs[ID_AA64DFR0] = 0x10305106; ++ cpu->isar.regs[ID_AA64ISAR0] = 0x00011120; ++ cpu->isar.regs[ID_AA64MMFR0] = 0x00001124; ++ cpu->isar.regs[DBGDIDR] = 0x3516d000; + cpu->clidr = 0x0a200023; + cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ + cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ +@@ -275,10 +275,10 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + + cpu->midr = 0x480fd010; + cpu->ctr = 0x84448004; +- cpu->isar.id_aa64pfr0 = 0x11001111; +- cpu->isar.id_aa64dfr0 = 0x110305408; +- cpu->isar.id_aa64isar0 = 0x10211120; +- cpu->isar.id_aa64mmfr0 = 0x101125; ++ cpu->isar.regs[ID_AA64PFR0] = 0x11001111; ++ cpu->isar.regs[ID_AA64DFR0] = 0x110305408; ++ cpu->isar.regs[ID_AA64ISAR0] = 0x10211120; ++ cpu->isar.regs[ID_AA64MMFR0] = 0x101125; + } + + static void cpu_max_get_sve_vq(Object *obj, Visitor *v, const char *name, +@@ -321,7 +321,7 @@ static void aarch64_max_initfn(Object *obj) + uint32_t u; + aarch64_a57_initfn(obj); + +- t = cpu->isar.id_aa64isar0; ++ t = cpu->isar.regs[ID_AA64ISAR0]; + t = FIELD_DP64(t, ID_AA64ISAR0, AES, 2); /* AES + PMULL */ + t = FIELD_DP64(t, ID_AA64ISAR0, SHA1, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, SHA2, 2); /* SHA512 */ +@@ -335,9 +335,9 @@ static void aarch64_max_initfn(Object *obj) + t = FIELD_DP64(t, ID_AA64ISAR0, FHM, 1); + t = FIELD_DP64(t, ID_AA64ISAR0, TS, 2); /* v8.5-CondM */ + t = FIELD_DP64(t, ID_AA64ISAR0, RNDR, 1); +- cpu->isar.id_aa64isar0 = t; ++ cpu->isar.regs[ID_AA64ISAR0] = t; + +- t = cpu->isar.id_aa64isar1; ++ t = cpu->isar.regs[ID_AA64ISAR1]; + t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */ +@@ -347,45 +347,45 @@ static void aarch64_max_initfn(Object *obj) + t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); +- cpu->isar.id_aa64isar1 = t; ++ cpu->isar.regs[ID_AA64ISAR1] = t; + +- t = cpu->isar.id_aa64pfr0; ++ t = cpu->isar.regs[ID_AA64PFR0]; + t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); + t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); + t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); +- cpu->isar.id_aa64pfr0 = t; ++ cpu->isar.regs[ID_AA64PFR0] = t; + +- t = cpu->isar.id_aa64pfr1; ++ t = cpu->isar.regs[ID_AA64PFR1]; + t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); +- cpu->isar.id_aa64pfr1 = t; ++ cpu->isar.regs[ID_AA64PFR1] = t; + +- t = cpu->isar.id_aa64mmfr1; ++ t = cpu->isar.regs[ID_AA64MMFR1]; + t = FIELD_DP64(t, ID_AA64MMFR1, HPDS, 1); /* HPD */ + t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); + t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 2); /* ATS1E1 */ +- cpu->isar.id_aa64mmfr1 = t; ++ cpu->isar.regs[ID_AA64MMFR1] = t; + + /* Replicate the same data to the 32-bit id registers. */ +- u = cpu->isar.id_isar5; ++ u = cpu->isar.regs[ID_ISAR5]; + u = FIELD_DP32(u, ID_ISAR5, AES, 2); /* AES + PMULL */ + u = FIELD_DP32(u, ID_ISAR5, SHA1, 1); + u = FIELD_DP32(u, ID_ISAR5, SHA2, 1); + u = FIELD_DP32(u, ID_ISAR5, CRC32, 1); + u = FIELD_DP32(u, ID_ISAR5, RDM, 1); + u = FIELD_DP32(u, ID_ISAR5, VCMA, 1); +- cpu->isar.id_isar5 = u; ++ cpu->isar.regs[ID_ISAR5] = u; + +- u = cpu->isar.id_isar6; ++ u = cpu->isar.regs[ID_ISAR6]; + u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1); + u = FIELD_DP32(u, ID_ISAR6, DP, 1); + u = FIELD_DP32(u, ID_ISAR6, FHM, 1); + u = FIELD_DP32(u, ID_ISAR6, SB, 1); + u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1); +- cpu->isar.id_isar6 = u; ++ cpu->isar.regs[ID_ISAR6] = u; + +- u = cpu->isar.id_mmfr3; ++ u = cpu->isar.regs[ID_MMFR3]; + u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */ +- cpu->isar.id_mmfr3 = u; ++ cpu->isar.regs[ID_MMFR3] = u; + + /* + * FIXME: We do not yet support ARMv8.2-fp16 for AArch32 yet, +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 49cd7a7e..459af431 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5597,7 +5597,7 @@ static void define_debug_regs(ARMCPU *cpu) + ARMCPRegInfo dbgdidr = { + .name = "DBGDIDR", .cp = 14, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 0, + .access = PL0_R, .accessfn = access_tda, +- .type = ARM_CP_CONST, .resetvalue = cpu->isar.dbgdidr, ++ .type = ARM_CP_CONST, .resetvalue = cpu->isar.regs[DBGDIDR], + }; + + /* Note that all these register fields hold "number of Xs minus 1". */ +@@ -5672,7 +5672,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) + static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri) + { + ARMCPU *cpu = env_archcpu(env); +- uint64_t pfr0 = cpu->isar.id_aa64pfr0; ++ uint64_t pfr0 = cpu->isar.regs[ID_AA64PFR0]; + + if (env->gicv3state) { + pfr0 |= 1 << 24; +@@ -5898,7 +5898,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_DFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_dfr0 }, ++ .resetvalue = cpu->isar.regs[ID_DFR0] }, + { .name = "ID_AFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -5906,51 +5906,51 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_MMFR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_mmfr0 }, ++ .resetvalue = cpu->isar.regs[ID_MMFR0] }, + { .name = "ID_MMFR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_mmfr1 }, ++ .resetvalue = cpu->isar.regs[ID_MMFR1] }, + { .name = "ID_MMFR2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_mmfr2 }, ++ .resetvalue = cpu->isar.regs[ID_MMFR2] }, + { .name = "ID_MMFR3", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 1, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_mmfr3 }, ++ .resetvalue = cpu->isar.regs[ID_MMFR3] }, + { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar0 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR0] }, + { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar1 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR1] }, + { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar2 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR2] }, + { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar3 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR3] }, + { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar4 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR4] }, + { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar5 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR5] }, + { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_mmfr4 }, ++ .resetvalue = cpu->isar.regs[ID_MMFR4] }, + { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_isar6 }, ++ .resetvalue = cpu->isar.regs[ID_ISAR6] }, + REGINFO_SENTINEL + }; + define_arm_cp_regs(cpu, v6_idregs); +@@ -6074,7 +6074,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64pfr1}, ++ .resetvalue = cpu->isar.regs[ID_AA64PFR1]}, + { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6103,11 +6103,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_AA64DFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64dfr0 }, ++ .resetvalue = cpu->isar.regs[ID_AA64DFR0] }, + { .name = "ID_AA64DFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64dfr1 }, ++ .resetvalue = cpu->isar.regs[ID_AA64DFR1] }, + { .name = "ID_AA64DFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6135,11 +6135,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64isar0 }, ++ .resetvalue = cpu->isar.regs[ID_AA64ISAR0] }, + { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64isar1 }, ++ .resetvalue = cpu->isar.regs[ID_AA64ISAR1] }, + { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6167,15 +6167,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "ID_AA64MMFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64mmfr0 }, ++ .resetvalue = cpu->isar.regs[ID_AA64MMFR0] }, + { .name = "ID_AA64MMFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64mmfr1 }, ++ .resetvalue = cpu->isar.regs[ID_AA64MMFR1] }, + { .name = "ID_AA64MMFR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.id_aa64mmfr2 }, ++ .resetvalue = cpu->isar.regs[ID_AA64MMFR2] }, + { .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6199,15 +6199,15 @@ void register_cp_regs_for_features(ARMCPU *cpu) + { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.mvfr0 }, ++ .resetvalue = cpu->isar.regs[MVFR0] }, + { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.mvfr1 }, ++ .resetvalue = cpu->isar.regs[MVFR1] }, + { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, + .access = PL1_R, .type = ARM_CP_CONST, +- .resetvalue = cpu->isar.mvfr2 }, ++ .resetvalue = cpu->isar.regs[MVFR2] }, + { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3, + .access = PL1_R, .type = ARM_CP_CONST, +@@ -6426,7 +6426,7 @@ void register_cp_regs_for_features(ARMCPU *cpu) + define_arm_cp_regs(cpu, vmsa_pmsa_cp_reginfo); + define_arm_cp_regs(cpu, vmsa_cp_reginfo); + /* TTCBR2 is introduced with ARMv8.2-A32HPD. */ +- if (FIELD_EX32(cpu->isar.id_mmfr4, ID_MMFR4, HPDS) != 0) { ++ if (FIELD_EX32(cpu->isar.regs[ID_MMFR4], ID_MMFR4, HPDS) != 0) { + define_one_arm_cp_reg(cpu, &ttbcr2_reginfo); + } + } +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 1d01ecc4..2da13ba8 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -237,7 +237,7 @@ static inline unsigned int arm_pamax(ARMCPU *cpu) + [5] = 48, + }; + unsigned int parange = +- FIELD_EX64(cpu->isar.id_aa64mmfr0, ID_AA64MMFR0, PARANGE); ++ FIELD_EX64(cpu->isar.regs[ID_AA64MMFR0], ID_AA64MMFR0, PARANGE); + + /* id_aa64mmfr0 is a read-only register so values outside of the + * supported mappings can be considered an implementation error. */ +@@ -865,9 +865,9 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env) + static inline int arm_num_brps(ARMCPU *cpu) + { + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1; ++ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, BRPS) + 1; + } else { +- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, BRPS) + 1; ++ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, BRPS) + 1; + } + } + +@@ -879,9 +879,9 @@ static inline int arm_num_brps(ARMCPU *cpu) + static inline int arm_num_wrps(ARMCPU *cpu) + { + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1; ++ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, WRPS) + 1; + } else { +- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, WRPS) + 1; ++ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, WRPS) + 1; + } + } + +@@ -893,9 +893,10 @@ static inline int arm_num_wrps(ARMCPU *cpu) + static inline int arm_num_ctx_cmps(ARMCPU *cpu) + { + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { +- return FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS) + 1; ++ return FIELD_EX64(cpu->isar.regs[ID_AA64DFR0], ID_AA64DFR0, ++ CTX_CMPS) + 1; + } else { +- return FIELD_EX32(cpu->isar.dbgdidr, DBGDIDR, CTX_CMPS) + 1; ++ return FIELD_EX32(cpu->isar.regs[DBGDIDR], DBGDIDR, CTX_CMPS) + 1; + } + } + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 2a88b8df..06cf31e8 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -455,7 +455,7 @@ static inline void unset_feature(uint64_t *features, int feature) + *features &= ~(1ULL << feature); + } + +-static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) ++static int read_sys_reg32(int fd, uint64_t *pret, uint64_t id) + { + uint64_t ret; + struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; +@@ -509,7 +509,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + ahcf->target = init.target; + ahcf->dtb_compatible = "arm,arm-v8"; + +- err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, ++ err = read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64PFR0], + ARM64_SYS_REG(3, 0, 0, 4, 0)); + if (unlikely(err < 0)) { + /* +@@ -528,24 +528,24 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * ??? Either of these sounds like too much effort just + * to work around running a modern host kernel. + */ +- ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ ++ ahcf->isar.regs[ID_AA64PFR0] = 0x00000011; /* EL1&0, AArch64 only */ + err = 0; + } else { +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64PFR1], + ARM64_SYS_REG(3, 0, 0, 4, 1)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64DFR0], + ARM64_SYS_REG(3, 0, 0, 5, 0)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64DFR1], + ARM64_SYS_REG(3, 0, 0, 5, 1)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64ISAR0], + ARM64_SYS_REG(3, 0, 0, 6, 0)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64ISAR1], + ARM64_SYS_REG(3, 0, 0, 6, 1)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR0], + ARM64_SYS_REG(3, 0, 0, 7, 0)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR1], + ARM64_SYS_REG(3, 0, 0, 7, 1)); +- err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, ++ err |= read_sys_reg64(fdarray[2], &ahcf->isar.regs[ID_AA64MMFR2], + ARM64_SYS_REG(3, 0, 0, 7, 2)); + + /* +@@ -555,38 +555,38 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * than skipping the reads and leaving 0, as we must avoid + * considering the values in every case. + */ +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_DFR0], + ARM64_SYS_REG(3, 0, 0, 1, 2)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR0], + ARM64_SYS_REG(3, 0, 0, 1, 4)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR1], + ARM64_SYS_REG(3, 0, 0, 1, 5)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR2], + ARM64_SYS_REG(3, 0, 0, 1, 6)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR3], + ARM64_SYS_REG(3, 0, 0, 1, 7)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR0], + ARM64_SYS_REG(3, 0, 0, 2, 0)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR1], + ARM64_SYS_REG(3, 0, 0, 2, 1)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR2], + ARM64_SYS_REG(3, 0, 0, 2, 2)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR3], + ARM64_SYS_REG(3, 0, 0, 2, 3)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR4], + ARM64_SYS_REG(3, 0, 0, 2, 4)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR5], + ARM64_SYS_REG(3, 0, 0, 2, 5)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_MMFR4], + ARM64_SYS_REG(3, 0, 0, 2, 6)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[ID_ISAR6], + ARM64_SYS_REG(3, 0, 0, 2, 7)); + +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR0], + ARM64_SYS_REG(3, 0, 0, 3, 0)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR1], + ARM64_SYS_REG(3, 0, 0, 3, 1)); +- err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, ++ err |= read_sys_reg32(fdarray[2], &ahcf->isar.regs[MVFR2], + ARM64_SYS_REG(3, 0, 0, 3, 2)); + + /* +@@ -599,14 +599,16 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. + * We only do this if the CPU supports AArch32 at EL1. + */ +- if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { +- int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); +- int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); ++ if (FIELD_EX32(ahcf->isar.regs[ID_AA64PFR0], ID_AA64PFR0, EL1) >= 2) { ++ int wrps = FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], ++ ID_AA64DFR0, WRPS); ++ int brps = FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], ++ ID_AA64DFR0, BRPS); + int ctx_cmps = +- FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); ++ FIELD_EX64(ahcf->isar.regs[ID_AA64DFR0], ID_AA64DFR0, CTX_CMPS); + int version = 6; /* ARMv8 debug architecture */ + bool has_el3 = +- !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); ++ !!FIELD_EX32(ahcf->isar.regs[ID_AA64PFR0], ID_AA64PFR0, EL3); + uint32_t dbgdidr = 0; + + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); +@@ -616,7 +618,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); + dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); + dbgdidr |= (1 << 15); /* RES1 bit */ +- ahcf->isar.dbgdidr = dbgdidr; ++ ahcf->isar.regs[DBGDIDR] = dbgdidr; + } + } + +-- +2.25.1 + diff --git a/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch b/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch new file mode 100644 index 0000000000000000000000000000000000000000..41c67cf1b8024af9f48888ecd782e9927a2f166e --- /dev/null +++ b/target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch @@ -0,0 +1,152 @@ +From 860035652c7866b033762f6d90f81d5ddedf855c Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 22 Apr 2020 17:08:43 +0800 +Subject: [PATCH] target/arm/cpu: Add the kvm-no-adjvtime CPU property + +kvm-no-adjvtime is a KVM specific CPU property and a first of its +kind. To accommodate it we also add kvm_arm_add_vcpu_properties() +and a KVM specific CPU properties description to the CPU features +document. + +Signed-off-by: Andrew Jones +Message-id: 20200120101023.16030-7-drjones@redhat.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e9a2a959..cfda6cc5 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1748,6 +1748,11 @@ static void machvirt_init(MachineState *machine) + } + } + ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime", NULL)) { ++ object_property_set_bool(cpuobj, true, "kvm-no-adjvtime", NULL); ++ } ++ + if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) { + object_property_set_bool(cpuobj, false, "pmu", NULL); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 43a6ce91..a9d6977a 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -107,6 +107,7 @@ typedef struct { + bool claim_edge_triggered_timers; + bool smbios_old_sys_ver; + bool no_highmem_ecam; ++ bool kvm_no_adjvtime; + } VirtMachineClass; + + typedef struct { +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index bc3da9a3..39bbe7e2 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2441,6 +2441,7 @@ static void arm_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + cortex_a15_initfn(obj); + +@@ -2629,6 +2630,7 @@ static void arm_host_initfn(Object *obj) + ARMCPU *cpu = ARM_CPU(obj); + + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + arm_cpu_post_init(obj); + } + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index dbf44b92..b30ca7c9 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -312,6 +312,7 @@ static void aarch64_max_initfn(Object *obj) + + if (kvm_enabled()) { + kvm_arm_set_cpu_features_from_host(cpu); ++ kvm_arm_add_vcpu_properties(obj); + } else { + uint64_t t; + uint32_t u; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 21fb7ecd..327b3bc3 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -16,6 +16,8 @@ + #include "qemu-common.h" + #include "qemu/timer.h" + #include "qemu/error-report.h" ++#include "qom/object.h" ++#include "qapi/error.h" + #include "sysemu/sysemu.h" + #include "sysemu/kvm.h" + #include "sysemu/kvm_int.h" +@@ -162,6 +164,32 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + env->features = arm_host_cpu_features.features; + } + ++static bool kvm_no_adjvtime_get(Object *obj, Error **errp) ++{ ++ return !ARM_CPU(obj)->kvm_adjvtime; ++} ++ ++static void kvm_no_adjvtime_set(Object *obj, bool value, Error **errp) ++{ ++ ARM_CPU(obj)->kvm_adjvtime = !value; ++} ++ ++/* KVM VCPU properties should be prefixed with "kvm-". */ ++void kvm_arm_add_vcpu_properties(Object *obj) ++{ ++ if (!kvm_enabled()) { ++ return; ++ } ++ ++ ARM_CPU(obj)->kvm_adjvtime = true; ++ object_property_add_bool(obj, "kvm-no-adjvtime", kvm_no_adjvtime_get, ++ kvm_no_adjvtime_set, &error_abort); ++ object_property_set_description(obj, "kvm-no-adjvtime", ++ "Set on to disable the adjustment of " ++ "the virtual counter. VM stopped time " ++ "will be counted.", &error_abort); ++} ++ + int kvm_arm_get_max_vm_ipa_size(MachineState *ms) + { + KVMState *s = KVM_STATE(ms->accelerator); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 97560d4e..0de5f83e 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -230,6 +230,15 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + */ + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + ++/** ++ * kvm_arm_add_vcpu_properties: ++ * @obj: The CPU object to add the properties to ++ * ++ * Add all KVM specific CPU properties to the CPU object. These ++ * are the CPU properties with "kvm-" prefixed names. ++ */ ++void kvm_arm_add_vcpu_properties(Object *obj); ++ + /** + * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle +@@ -294,6 +303,8 @@ static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + cpu->host_cpu_probe_failed = true; + } + ++static inline void kvm_arm_add_vcpu_properties(Object *obj) {} ++ + static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms) + { + return -ENOENT; +-- +2.23.0 diff --git a/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch b/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch new file mode 100644 index 0000000000000000000000000000000000000000..cf9bb73b8f5bf63c5e073042ca137266fd28e894 --- /dev/null +++ b/target-arm-ignore-evtstrm-and-cpuid-CPU-features.patch @@ -0,0 +1,66 @@ +From dfedc889fafd35efd4f8382b7672bf0e556f9f45 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Mon, 7 Sep 2020 14:07:07 +0800 +Subject: [PATCH] target/arm: ignore evtstrm and cpuid CPU features + +evtstrm and cpuid cann't be controlled by VMM: +1. evtstrm: The generic timer is configured to generate events at a + frequency of approximately 100KHz. It's controlled by the linux + kernel config CONFIG_ARM_ARCH_TIMER_EVTSTREAM. +2. cpuid: EL0 access to certain ID registers is available. It's always + set by linux kernel after 77c97b4ee2129 ("arm64: cpufeature: Expose + CPUID registers by emulation"). +However, they are exposed by getauxval() and /proc/cpuinfo. Hence, +let's report and ignore the CPU features if someone set them. + +Signed-off-by: Peng Liang +--- + target/arm/cpu64.c | 29 ++++++++++++++++++++++++++++- + 1 file changed, 28 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 7de20848..726d123d 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -506,10 +506,37 @@ static void arm_cpu_parse_featurestr(const char *typename, char *features, + } + } + ++static const char *unconfigurable_feats[] = { ++ "evtstrm", ++ "cpuid", ++ NULL ++}; ++ ++static bool is_configurable_feat(const char *name) ++{ ++ int i; ++ ++ for (i = 0; unconfigurable_feats[i]; ++i) { ++ if (g_strcmp0(unconfigurable_feats[i], name) == 0) { ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static void + cpu_add_feat_as_prop(const char *typename, const char *name, const char *val) + { +- GlobalProperty *prop = g_new0(typeof(*prop), 1); ++ GlobalProperty *prop; ++ ++ if (!is_configurable_feat(name)) { ++ info_report("CPU feature '%s' is not configurable by QEMU. Ignore it.", ++ name); ++ return; ++ } ++ ++ prop = g_new0(typeof(*prop), 1); + prop->driver = typename; + prop->property = g_strdup(name); + prop->value = g_strdup(val); +-- +2.28.0 + diff --git a/target-arm-introduce-CPU-feature-dependency-mechanis.patch b/target-arm-introduce-CPU-feature-dependency-mechanis.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c47cba243d4890cfc205c6c9b5b04b37705664f --- /dev/null +++ b/target-arm-introduce-CPU-feature-dependency-mechanis.patch @@ -0,0 +1,184 @@ +From da538bb9d1acc22543a2b7b07ae35a62386bf226 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:46 +0800 +Subject: [PATCH 5/9] target/arm: introduce CPU feature dependency mechanism + +Some CPU features are dependent on other CPU features. For example, +ID_AA64PFR0_EL1.FP field and ID_AA64PFR0_EL1.AdvSIMD must have the same +value, which means FP and ADVSIMD are dependent on each other, FPHP and +ADVSIMDHP are dependent on each other. + +This commit introduces a mechanism for CPU feature dependency in +AArch64. We build a directed graph from the CPU feature dependency +relationship, each edge from->to means the `to` CPU feature is dependent +on the `from` CPU feature. And we will automatically enable/disable CPU +feature according to the directed graph. + +For example, a, b, and c CPU features are in relationship a->b->c, which +means c is dependent on b and b is dependent on a. If c is enabled by +user, then a and b is enabled automatically. And if a is disabled by +user, then b and c is disabled automatically. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 129 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 3f63312c..d5576538 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1306,6 +1306,103 @@ static struct CPUFeatureInfo cpu_features[] = { + }, + }; + ++typedef struct CPUFeatureDep { ++ CPUFeatureInfo from, to; ++} CPUFeatureDep; ++ ++static const CPUFeatureDep feature_dependencies[] = { ++ { ++ .from = FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), ++ .to = FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), ++ }, ++ { ++ .from = FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), ++ .to = FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), ++ }, ++ { ++ .from = { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, ++ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "fphp", .is_32bit = false, ++ }, ++ .to = { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, ++ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "asimdhp", .is_32bit = false, ++ }, ++ }, ++ { ++ .from = { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, ++ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "asimdhp", .is_32bit = false, ++ }, ++ .to = { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, ++ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "fphp", .is_32bit = false, ++ }, ++ }, ++ { ++ ++ .from = FIELD_INFO("aes", ID_AA64ISAR0, AES, false, 1, 0, false), ++ .to = { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_AES_LENGTH, ++ .shift = R_ID_AA64ISAR0_AES_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "pmull", .is_32bit = false, ++ }, ++ }, ++ { ++ ++ .from = FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), ++ .to = { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, ++ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "sha512", .is_32bit = false, ++ }, ++ }, ++ { ++ .from = FIELD_INFO("lrcpc", ID_AA64ISAR1, LRCPC, false, 1, 0, false), ++ .to = { ++ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_LRCPC_LENGTH, ++ .shift = R_ID_AA64ISAR1_LRCPC_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "ilrcpc", .is_32bit = false, ++ }, ++ }, ++ { ++ .from = FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), ++ .to = FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), ++ }, ++ { ++ .from = FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), ++ .to = FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), ++ }, ++ { ++ .from = FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), ++ .to = FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), ++ }, ++ { ++ .from = FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), ++ .to = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), ++ }, ++ { ++ .from = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), ++ .to = { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, ++ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "sha512", .is_32bit = false, ++ }, ++ }, ++ { ++ .from = { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, ++ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "sha512", .is_32bit = false, ++ }, ++ .to = FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), ++ }, ++}; ++ + static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { +@@ -1342,13 +1439,45 @@ static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, + } + + if (value) { ++ if (object_property_get_bool(obj, feat->name, NULL)) { ++ return; ++ } + isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], + feat->shift, feat->length, + feat->min_value); ++ /* Auto enable the features which current feature is dependent on. */ ++ for (int i = 0; i < ARRAY_SIZE(feature_dependencies); ++i) { ++ const CPUFeatureDep *d = &feature_dependencies[i]; ++ if (strcmp(d->to.name, feat->name) != 0) { ++ continue; ++ } ++ ++ object_property_set_bool(obj, true, d->from.name, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } + } else { ++ if (!object_property_get_bool(obj, feat->name, NULL)) { ++ return; ++ } + isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], + feat->shift, feat->length, + feat->ni_value); ++ /* Auto disable the features which are dependent on current feature. */ ++ for (int i = 0; i < ARRAY_SIZE(feature_dependencies); ++i) { ++ const CPUFeatureDep *d = &feature_dependencies[i]; ++ if (strcmp(d->from.name, feat->name) != 0) { ++ continue; ++ } ++ ++ object_property_set_bool(obj, false, d->to.name, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } + } + } + +-- +2.25.1 + diff --git a/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch b/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch new file mode 100644 index 0000000000000000000000000000000000000000..0477419196061a5e452363845ffd4591bfc5ef21 --- /dev/null +++ b/target-arm-introduce-KVM_CAP_ARM_CPU_FEATURE.patch @@ -0,0 +1,92 @@ +From 7ed595242f52d0654982d41a9c2a63be2bc3378e Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:55 +0800 +Subject: [PATCH 6/9] target/arm: introduce KVM_CAP_ARM_CPU_FEATURE + +Introduce KVM_CAP_ARM_CPU_FEATURE to check whether KVM supports to set +CPU features in ARM. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + linux-headers/linux/kvm.h | 2 ++ + target/arm/cpu.c | 5 +++++ + target/arm/kvm64.c | 14 ++++++++++++++ + target/arm/kvm_arm.h | 7 +++++++ + 4 files changed, 28 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 744e888e..4844edc3 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -995,6 +995,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 + #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 + ++#define KVM_CAP_ARM_CPU_FEATURE 555 ++ + #ifdef KVM_CAP_IRQ_ROUTING + + struct kvm_irq_routing_irqchip { +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index d5576538..db46afba 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1427,6 +1427,11 @@ static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, + Error *local_err = NULL; + bool value; + ++ if (!kvm_arm_cpu_feature_supported()) { ++ warn_report("KVM doesn't support to set CPU feature in arm. " ++ "Setting to `%s` is ignored.", name); ++ return; ++ } + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 06cf31e8..05345556 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -644,6 +644,20 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + return true; + } + ++bool kvm_arm_cpu_feature_supported(void) ++{ ++ static bool cpu_feature_initialized; ++ static bool cpu_feature_supported; ++ ++ if (!cpu_feature_initialized) { ++ cpu_feature_supported = kvm_check_extension(kvm_state, ++ KVM_CAP_ARM_CPU_FEATURE); ++ cpu_feature_initialized = true; ++ } ++ ++ return cpu_feature_supported; ++} ++ + #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 + + int kvm_arch_init_vcpu(CPUState *cs) +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 9b7104d6..49e80878 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -239,6 +239,13 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + */ + void kvm_arm_add_vcpu_properties(Object *obj); + ++/** ++ * kvm_arm_cpu_feature_supported: ++ * ++ * Returns true if KVM can set CPU features and false otherwise. ++ */ ++bool kvm_arm_cpu_feature_supported(void); ++ + /** + * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle +-- +2.25.1 + diff --git a/target-arm-kvm-Implement-virtual-time-adjustment.patch b/target-arm-kvm-Implement-virtual-time-adjustment.patch new file mode 100644 index 0000000000000000000000000000000000000000..86450c4d8f1739527a7065ec9242706605487b0e --- /dev/null +++ b/target-arm-kvm-Implement-virtual-time-adjustment.patch @@ -0,0 +1,290 @@ +From 77ee224418fac859acecd9aca4d18555ced42db6 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 21 Apr 2020 17:32:31 +0800 +Subject: [PATCH 3/4] target/arm/kvm: Implement virtual time adjustment + +When a VM is stopped (such as when it's paused) guest virtual time +should stop counting. Otherwise, when the VM is resumed it will +experience time jumps and its kernel may report soft lockups. Not +counting virtual time while the VM is stopped has the side effect +of making the guest's time appear to lag when compared with real +time, and even with time derived from the physical counter. For +this reason, this change, which is enabled by default, comes with +a KVM CPU feature allowing it to be disabled, restoring legacy +behavior. + +This patch only provides the implementation of the virtual time +adjustment. A subsequent patch will provide the CPU property +allowing the change to be enabled and disabled. + +Reported-by: Bijan Mottahedeh +Signed-off-by: Andrew Jones +Message-id: 20200120101023.16030-6-drjones@redhat.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +--- + target/arm/cpu.h | 7 ++++ + target/arm/kvm.c | 92 ++++++++++++++++++++++++++++++++++++++++++++ + target/arm/kvm32.c | 2 + + target/arm/kvm64.c | 2 + + target/arm/kvm_arm.h | 37 ++++++++++++++++++ + target/arm/machine.c | 7 ++++ + 6 files changed, 147 insertions(+) + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 94c990cd..e19531a7 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -816,6 +816,13 @@ struct ARMCPU { + /* KVM init features for this CPU */ + uint32_t kvm_init_features[7]; + ++ /* KVM CPU state */ ++ ++ /* KVM virtual time adjustment */ ++ bool kvm_adjvtime; ++ bool kvm_vtime_dirty; ++ uint64_t kvm_vtime; ++ + /* Uniprocessor system with MP extensions */ + bool mp_is_up; + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index cc7a46df..21fb7ecd 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -336,6 +336,22 @@ static int compare_u64(const void *a, const void *b) + return 0; + } + ++/* ++ * cpreg_values are sorted in ascending order by KVM register ID ++ * (see kvm_arm_init_cpreg_list). This allows us to cheaply find ++ * the storage for a KVM register by ID with a binary search. ++ */ ++static uint64_t *kvm_arm_get_cpreg_ptr(ARMCPU *cpu, uint64_t regidx) ++{ ++ uint64_t *res; ++ ++ res = bsearch(®idx, cpu->cpreg_indexes, cpu->cpreg_array_len, ++ sizeof(uint64_t), compare_u64); ++ assert(res); ++ ++ return &cpu->cpreg_values[res - cpu->cpreg_indexes]; ++} ++ + /* Initialize the ARMCPU cpreg list according to the kernel's + * definition of what CPU registers it knows about (and throw away + * the previous TCG-created cpreg list). +@@ -489,6 +505,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level) + return ok; + } + ++void kvm_arm_cpu_pre_save(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_vtime_dirty) { ++ *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT) = cpu->kvm_vtime; ++ } ++} ++ ++void kvm_arm_cpu_post_load(ARMCPU *cpu) ++{ ++ /* KVM virtual time adjustment */ ++ if (cpu->kvm_adjvtime) { ++ cpu->kvm_vtime = *kvm_arm_get_cpreg_ptr(cpu, KVM_REG_ARM_TIMER_CNT); ++ cpu->kvm_vtime_dirty = true; ++ } ++} ++ + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; +@@ -556,6 +589,50 @@ int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) + return 0; + } + ++void kvm_arm_get_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to get KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = true; ++} ++ ++void kvm_arm_put_virtual_time(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ struct kvm_one_reg reg = { ++ .id = KVM_REG_ARM_TIMER_CNT, ++ .addr = (uintptr_t)&cpu->kvm_vtime, ++ }; ++ int ret; ++ ++ if (!cpu->kvm_vtime_dirty) { ++ return; ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); ++ if (ret) { ++ error_report("Failed to set KVM_REG_ARM_TIMER_CNT"); ++ abort(); ++ } ++ ++ cpu->kvm_vtime_dirty = false; ++} ++ + int kvm_put_vcpu_events(ARMCPU *cpu) + { + CPUARMState *env = &cpu->env; +@@ -667,6 +744,21 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) + return MEMTXATTRS_UNSPECIFIED; + } + ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state) ++{ ++ CPUState *cs = opaque; ++ ARMCPU *cpu = ARM_CPU(cs); ++ ++ if (running) { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_put_virtual_time(cs); ++ } ++ } else { ++ if (cpu->kvm_adjvtime) { ++ kvm_arm_get_virtual_time(cs); ++ } ++ } ++} + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +diff --git a/target/arm/kvm32.c b/target/arm/kvm32.c +index 51f78f72..ee158830 100644 +--- a/target/arm/kvm32.c ++++ b/target/arm/kvm32.c +@@ -195,6 +195,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index f2f0a92e..4f0bf000 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -609,6 +609,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + ++ qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); + if (cpu->start_powered_off) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 32d97ce5..97560d4e 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -113,6 +113,23 @@ bool write_list_to_kvmstate(ARMCPU *cpu, int level); + */ + bool write_kvmstate_to_list(ARMCPU *cpu); + ++/** ++ * kvm_arm_cpu_pre_save: ++ * @cpu: ARMCPU ++ * ++ * Called after write_kvmstate_to_list() from cpu_pre_save() to update ++ * the cpreg list with KVM CPU state. ++ */ ++void kvm_arm_cpu_pre_save(ARMCPU *cpu); ++ ++/** ++ * kvm_arm_cpu_post_load: ++ * @cpu: ARMCPU ++ * ++ * Called from cpu_post_load() to update KVM CPU state from the cpreg list. ++ */ ++void kvm_arm_cpu_post_load(ARMCPU *cpu); ++ + /** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU +@@ -241,6 +258,24 @@ int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + ++/** ++ * kvm_arm_get_virtual_time: ++ * @cs: CPUState ++ * ++ * Gets the VCPU's virtual counter and stores it in the KVM CPU state. ++ */ ++void kvm_arm_get_virtual_time(CPUState *cs); ++ ++/** ++ * kvm_arm_put_virtual_time: ++ * @cs: CPUState ++ * ++ * Sets the VCPU's virtual counter to the value stored in the KVM CPU state. ++ */ ++void kvm_arm_put_virtual_time(CPUState *cs); ++ ++void kvm_arm_vm_state_change(void *opaque, int running, RunState state); ++ + int kvm_arm_vgic_probe(void); + + void kvm_arm_pmu_set_irq(CPUState *cs, int irq); +@@ -272,6 +307,8 @@ static inline int kvm_arm_vgic_probe(void) + static inline void kvm_arm_pmu_set_irq(CPUState *cs, int irq) {} + static inline void kvm_arm_pmu_init(CPUState *cs) {} + ++static inline void kvm_arm_get_virtual_time(CPUState *cs) {} ++static inline void kvm_arm_put_virtual_time(CPUState *cs) {} + #endif + + static inline const char *gic_class_name(void) +diff --git a/target/arm/machine.c b/target/arm/machine.c +index 3fd319a3..ee3c59a6 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -644,6 +644,12 @@ static int cpu_pre_save(void *opaque) + /* This should never fail */ + abort(); + } ++ ++ /* ++ * kvm_arm_cpu_pre_save() must be called after ++ * write_kvmstate_to_list() ++ */ ++ kvm_arm_cpu_pre_save(cpu); + } else { + if (!write_cpustate_to_list(cpu, false)) { + /* This should never fail. */ +@@ -746,6 +752,7 @@ static int cpu_post_load(void *opaque, int version_id) + * we're using it. + */ + write_list_to_cpustate(cpu); ++ kvm_arm_cpu_post_load(cpu); + } else { + if (!write_list_to_cpustate(cpu)) { + return -1; +-- +2.23.0 diff --git a/target-arm-kvm-trivial-Clean-up-header-documentation.patch b/target-arm-kvm-trivial-Clean-up-header-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c28c63b1e9fa89ace5860f635f07e2d9b221bbe --- /dev/null +++ b/target-arm-kvm-trivial-Clean-up-header-documentation.patch @@ -0,0 +1,144 @@ +From c057499f90af4be8b26f57f8755aca0ddfcf9467 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 21 Apr 2020 16:52:07 +0800 +Subject: [PATCH 1/4] target/arm/kvm: trivial: Clean up header documentation + +Signed-off-by: Andrew Jones +Message-id: 20200120101023.16030-2-drjones@redhat.com +Reviewed-by: Peter Maydell +Signed-off-by: Peter Maydell +--- + target/arm/kvm_arm.h | 38 +++++++++++++++++++++++--------------- + 1 file changed, 23 insertions(+), 15 deletions(-) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index a9f3ccab..32d97ce5 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -61,8 +61,8 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group, + int kvm_arm_init_cpreg_list(ARMCPU *cpu); + + /** +- * kvm_arm_reg_syncs_via_cpreg_list +- * regidx: KVM register index ++ * kvm_arm_reg_syncs_via_cpreg_list: ++ * @regidx: KVM register index + * + * Return true if this KVM register should be synchronized via the + * cpreg list of arbitrary system registers, false if it is synchronized +@@ -71,8 +71,8 @@ int kvm_arm_init_cpreg_list(ARMCPU *cpu); + bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx); + + /** +- * kvm_arm_cpreg_level +- * regidx: KVM register index ++ * kvm_arm_cpreg_level: ++ * @regidx: KVM register index + * + * Return the level of this coprocessor/system register. Return value is + * either KVM_PUT_RUNTIME_STATE, KVM_PUT_RESET_STATE, or KVM_PUT_FULL_STATE. +@@ -134,6 +134,8 @@ void kvm_arm_init_serror_injection(CPUState *cs); + * @cpu: ARMCPU + * + * Get VCPU related state from kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_get_vcpu_events(ARMCPU *cpu); + +@@ -142,6 +144,8 @@ int kvm_get_vcpu_events(ARMCPU *cpu); + * @cpu: ARMCPU + * + * Put VCPU related state to kvm. ++ * ++ * Returns: 0 if success else < 0 error code + */ + int kvm_put_vcpu_events(ARMCPU *cpu); + +@@ -191,10 +195,12 @@ typedef struct ARMHostCPUFeatures { + + /** + * kvm_arm_get_host_cpu_features: +- * @ahcc: ARMHostCPUClass to fill in ++ * @ahcf: ARMHostCPUClass to fill in + * + * Probe the capabilities of the host kernel's preferred CPU and fill + * in the ARMHostCPUClass struct accordingly. ++ * ++ * Returns true on success and false otherwise. + */ + bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + +@@ -208,26 +214,30 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf); + void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu); + + /** +- * kvm_arm_get_max_vm_ipa_size - Returns the number of bits in the +- * IPA address space supported by KVM +- * ++ * kvm_arm_get_max_vm_ipa_size: + * @ms: Machine state handle ++ * ++ * Returns the number of bits in the IPA address space supported by KVM + */ + int kvm_arm_get_max_vm_ipa_size(MachineState *ms); + + /** +- * kvm_arm_sync_mpstate_to_kvm ++ * kvm_arm_sync_mpstate_to_kvm: + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMU's model. ++ * ++ * Returns 0 on success and -1 on failure. + */ + int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + + /** +- * kvm_arm_sync_mpstate_to_qemu ++ * kvm_arm_sync_mpstate_to_qemu: + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMU's model. ++ * ++ * Returns 0 on success and aborts on failure. + */ + int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + +@@ -241,7 +251,8 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + + static inline void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu) + { +- /* This should never actually be called in the "not KVM" case, ++ /* ++ * This should never actually be called in the "not KVM" case, + * but set up the fields to indicate an error anyway. + */ + cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE; +@@ -310,23 +321,20 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit); + * + * Return: TRUE if any hardware breakpoints in use. + */ +- + bool kvm_arm_hw_debug_active(CPUState *cs); + + /** + * kvm_arm_copy_hw_debug_data: +- * + * @ptr: kvm_guest_debug_arch structure + * + * Copy the architecture specific debug registers into the + * kvm_guest_debug ioctl structure. + */ + struct kvm_guest_debug_arch; +- + void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr); + + /** +- * its_class_name ++ * its_class_name: + * + * Return the ITS class name to use depending on whether KVM acceleration + * and KVM CAP_SIGNAL_MSI are supported +-- +2.23.0 diff --git a/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch b/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8cec1bd36e2da9526a643229252ac6760eebecf --- /dev/null +++ b/target-arm-kvm64-kvm64-cpus-have-timer-registers.patch @@ -0,0 +1,37 @@ +From 07bd62920f968da7d1d8962cc7fd3d29652d25f4 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 21 Apr 2020 17:04:13 +0800 +Subject: [PATCH 2/4] target/arm/kvm64: kvm64 cpus have timer registers + +Add the missing GENERIC_TIMER feature to kvm64 cpus. + +We don't currently use these registers when KVM is enabled, but it's +probably best we add the feature flag for consistency and potential +future use. There's also precedent, as we add the PMU feature flag to +KVM enabled guests, even though we don't use those registers either. + +This change was originally posted as a hunk of a different, never +merged patch from Bijan Mottahedeh. + +Signed-off-by: Andrew Jones +Reviewed-by: Richard Henderson +Message-id: 20200120101023.16030-4-drjones@redhat.com +Signed-off-by: Peter Maydell +--- + target/arm/kvm64.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 22d19c9a..f2f0a92e 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -587,6 +587,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) + set_feature(&features, ARM_FEATURE_NEON); + set_feature(&features, ARM_FEATURE_AARCH64); + set_feature(&features, ARM_FEATURE_PMU); ++ set_feature(&features, ARM_FEATURE_GENERIC_TIMER); + + ahcf->features = features; + +-- +2.23.0 diff --git a/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch b/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch new file mode 100644 index 0000000000000000000000000000000000000000..30f14bafcc5b70310e462e9b4f5ca5cb91708cef --- /dev/null +++ b/target-arm-monitor-Introduce-qmp_query_cpu_model_exp.patch @@ -0,0 +1,373 @@ +From c527fa45dd0bb03c7f35b79ff53f127297f96314 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Thu, 31 Oct 2019 15:27:26 +0100 +Subject: [PATCH 12/13] target/arm/monitor: Introduce + qmp_query_cpu_model_expansion + +Add support for the query-cpu-model-expansion QMP command to Arm. We +do this selectively, only exposing CPU properties which represent +optional CPU features which the user may want to enable/disable. +Additionally we restrict the list of queryable cpu models to 'max', +'host', or the current type when KVM is in use. And, finally, we only +implement expansion type 'full', as Arm does not yet have a "base" +CPU type. More details and example queries are described in a new +document (docs/arm-cpu-features.rst). + +Note, certainly more features may be added to the list of advertised +features, e.g. 'vfp' and 'neon'. The only requirement is that we can +detect invalid configurations and emit failures at QMP query time. +For 'vfp' and 'neon' this will require some refactoring to share a +validation function between the QMP query and the CPU realize +functions. + +Signed-off-by: Andrew Jones +Reviewed-by: Richard Henderson +Reviewed-by: Eric Auger +Reviewed-by: Beata Michalska +Message-id: 20191031142734.8590-2-drjones@redhat.com +Signed-off-by: Peter Maydell +--- + docs/arm-cpu-features.rst | 137 +++++++++++++++++++++++++++++++++++ + qapi/machine-target.json | 6 +- + target/arm/monitor.c | 145 ++++++++++++++++++++++++++++++++++++++ + 3 files changed, 285 insertions(+), 3 deletions(-) + create mode 100644 docs/arm-cpu-features.rst + +diff --git a/docs/arm-cpu-features.rst b/docs/arm-cpu-features.rst +new file mode 100644 +index 00000000..c79dcffb +--- /dev/null ++++ b/docs/arm-cpu-features.rst +@@ -0,0 +1,137 @@ ++================ ++ARM CPU Features ++================ ++ ++Examples of probing and using ARM CPU features ++ ++Introduction ++============ ++ ++CPU features are optional features that a CPU of supporting type may ++choose to implement or not. In QEMU, optional CPU features have ++corresponding boolean CPU proprieties that, when enabled, indicate ++that the feature is implemented, and, conversely, when disabled, ++indicate that it is not implemented. An example of an ARM CPU feature ++is the Performance Monitoring Unit (PMU). CPU types such as the ++Cortex-A15 and the Cortex-A57, which respectively implement ARM ++architecture reference manuals ARMv7-A and ARMv8-A, may both optionally ++implement PMUs. For example, if a user wants to use a Cortex-A15 without ++a PMU, then the `-cpu` parameter should contain `pmu=off` on the QEMU ++command line, i.e. `-cpu cortex-a15,pmu=off`. ++ ++As not all CPU types support all optional CPU features, then whether or ++not a CPU property exists depends on the CPU type. For example, CPUs ++that implement the ARMv8-A architecture reference manual may optionally ++support the AArch32 CPU feature, which may be enabled by disabling the ++`aarch64` CPU property. A CPU type such as the Cortex-A15, which does ++not implement ARMv8-A, will not have the `aarch64` CPU property. ++ ++QEMU's support may be limited for some CPU features, only partially ++supporting the feature or only supporting the feature under certain ++configurations. For example, the `aarch64` CPU feature, which, when ++disabled, enables the optional AArch32 CPU feature, is only supported ++when using the KVM accelerator and when running on a host CPU type that ++supports the feature. ++ ++CPU Feature Probing ++=================== ++ ++Determining which CPU features are available and functional for a given ++CPU type is possible with the `query-cpu-model-expansion` QMP command. ++Below are some examples where `scripts/qmp/qmp-shell` (see the top comment ++block in the script for usage) is used to issue the QMP commands. ++ ++(1) Determine which CPU features are available for the `max` CPU type ++ (Note, we started QEMU with qemu-system-aarch64, so `max` is ++ implementing the ARMv8-A reference manual in this case):: ++ ++ (QEMU) query-cpu-model-expansion type=full model={"name":"max"} ++ { "return": { ++ "model": { "name": "max", "props": { ++ "pmu": true, "aarch64": true ++ }}}} ++ ++We see that the `max` CPU type has the `pmu` and `aarch64` CPU features. ++We also see that the CPU features are enabled, as they are all `true`. ++ ++(2) Let's try to disable the PMU:: ++ ++ (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"pmu":false}} ++ { "return": { ++ "model": { "name": "max", "props": { ++ "pmu": false, "aarch64": true ++ }}}} ++ ++We see it worked, as `pmu` is now `false`. ++ ++(3) Let's try to disable `aarch64`, which enables the AArch32 CPU feature:: ++ ++ (QEMU) query-cpu-model-expansion type=full model={"name":"max","props":{"aarch64":false}} ++ {"error": { ++ "class": "GenericError", "desc": ++ "'aarch64' feature cannot be disabled unless KVM is enabled and 32-bit EL1 is supported" ++ }} ++ ++It looks like this feature is limited to a configuration we do not ++currently have. ++ ++(4) Let's try probing CPU features for the Cortex-A15 CPU type:: ++ ++ (QEMU) query-cpu-model-expansion type=full model={"name":"cortex-a15"} ++ {"return": {"model": {"name": "cortex-a15", "props": {"pmu": true}}}} ++ ++Only the `pmu` CPU feature is available. ++ ++A note about CPU feature dependencies ++------------------------------------- ++ ++It's possible for features to have dependencies on other features. I.e. ++it may be possible to change one feature at a time without error, but ++when attempting to change all features at once an error could occur ++depending on the order they are processed. It's also possible changing ++all at once doesn't generate an error, because a feature's dependencies ++are satisfied with other features, but the same feature cannot be changed ++independently without error. For these reasons callers should always ++attempt to make their desired changes all at once in order to ensure the ++collection is valid. ++ ++A note about CPU models and KVM ++------------------------------- ++ ++Named CPU models generally do not work with KVM. There are a few cases ++that do work, e.g. using the named CPU model `cortex-a57` with KVM on a ++seattle host, but mostly if KVM is enabled the `host` CPU type must be ++used. This means the guest is provided all the same CPU features as the ++host CPU type has. And, for this reason, the `host` CPU type should ++enable all CPU features that the host has by default. Indeed it's even ++a bit strange to allow disabling CPU features that the host has when using ++the `host` CPU type, but in the absence of CPU models it's the best we can ++do if we want to launch guests without all the host's CPU features enabled. ++ ++Enabling KVM also affects the `query-cpu-model-expansion` QMP command. The ++affect is not only limited to specific features, as pointed out in example ++(3) of "CPU Feature Probing", but also to which CPU types may be expanded. ++When KVM is enabled, only the `max`, `host`, and current CPU type may be ++expanded. This restriction is necessary as it's not possible to know all ++CPU types that may work with KVM, but it does impose a small risk of users ++experiencing unexpected errors. For example on a seattle, as mentioned ++above, the `cortex-a57` CPU type is also valid when KVM is enabled. ++Therefore a user could use the `host` CPU type for the current type, but ++then attempt to query `cortex-a57`, however that query will fail with our ++restrictions. This shouldn't be an issue though as management layers and ++users have been preferring the `host` CPU type for use with KVM for quite ++some time. Additionally, if the KVM-enabled QEMU instance running on a ++seattle host is using the `cortex-a57` CPU type, then querying `cortex-a57` ++will work. ++ ++Using CPU Features ++================== ++ ++After determining which CPU features are available and supported for a ++given CPU type, then they may be selectively enabled or disabled on the ++QEMU command line with that CPU type:: ++ ++ $ qemu-system-aarch64 -M virt -cpu max,pmu=off ++ ++The example above disables the PMU for the `max` CPU type. ++ +diff --git a/qapi/machine-target.json b/qapi/machine-target.json +index 55310a6a..04623224 100644 +--- a/qapi/machine-target.json ++++ b/qapi/machine-target.json +@@ -212,7 +212,7 @@ + ## + { 'struct': 'CpuModelExpansionInfo', + 'data': { 'model': 'CpuModelInfo' }, +- 'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' } ++ 'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' } + + ## + # @query-cpu-model-expansion: +@@ -237,7 +237,7 @@ + # query-cpu-model-expansion while using these is not advised. + # + # Some architectures may not support all expansion types. s390x supports +-# "full" and "static". ++# "full" and "static". Arm only supports "full". + # + # Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is + # not supported, if the model cannot be expanded, if the model contains +@@ -251,7 +251,7 @@ + 'data': { 'type': 'CpuModelExpansionType', + 'model': 'CpuModelInfo' }, + 'returns': 'CpuModelExpansionInfo', +- 'if': 'defined(TARGET_S390X) || defined(TARGET_I386)' } ++ 'if': 'defined(TARGET_S390X) || defined(TARGET_I386) || defined(TARGET_ARM)' } + + ## + # @CpuDefinitionInfo: +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index 6ec6dd04..560970de 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -23,7 +23,14 @@ + #include "qemu/osdep.h" + #include "hw/boards.h" + #include "kvm_arm.h" ++#include "qapi/error.h" ++#include "qapi/visitor.h" ++#include "qapi/qobject-input-visitor.h" ++#include "qapi/qapi-commands-machine-target.h" + #include "qapi/qapi-commands-misc-target.h" ++#include "qapi/qmp/qerror.h" ++#include "qapi/qmp/qdict.h" ++#include "qom/qom-qobject.h" + + static GICCapability *gic_cap_new(int version) + { +@@ -82,3 +89,141 @@ GICCapabilityList *qmp_query_gic_capabilities(Error **errp) + + return head; + } ++ ++/* ++ * These are cpu model features we want to advertise. The order here ++ * matters as this is the order in which qmp_query_cpu_model_expansion ++ * will attempt to set them. If there are dependencies between features, ++ * then the order that considers those dependencies must be used. ++ */ ++static const char *cpu_model_advertised_features[] = { ++ "aarch64", "pmu", ++ NULL ++}; ++ ++CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, ++ CpuModelInfo *model, ++ Error **errp) ++{ ++ CpuModelExpansionInfo *expansion_info; ++ const QDict *qdict_in = NULL; ++ QDict *qdict_out; ++ ObjectClass *oc; ++ Object *obj; ++ const char *name; ++ int i; ++ ++ if (type != CPU_MODEL_EXPANSION_TYPE_FULL) { ++ error_setg(errp, "The requested expansion type is not supported"); ++ return NULL; ++ } ++ ++ if (!kvm_enabled() && !strcmp(model->name, "host")) { ++ error_setg(errp, "The CPU type '%s' requires KVM", model->name); ++ return NULL; ++ } ++ ++ oc = cpu_class_by_name(TYPE_ARM_CPU, model->name); ++ if (!oc) { ++ error_setg(errp, "The CPU type '%s' is not a recognized ARM CPU type", ++ model->name); ++ return NULL; ++ } ++ ++ if (kvm_enabled()) { ++ const char *cpu_type = current_machine->cpu_type; ++ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); ++ bool supported = false; ++ ++ if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { ++ /* These are kvmarm's recommended cpu types */ ++ supported = true; ++ } else if (strlen(model->name) == len && ++ !strncmp(model->name, cpu_type, len)) { ++ /* KVM is enabled and we're using this type, so it works. */ ++ supported = true; ++ } ++ if (!supported) { ++ error_setg(errp, "We cannot guarantee the CPU type '%s' works " ++ "with KVM on this host", model->name); ++ return NULL; ++ } ++ } ++ ++ if (model->props) { ++ qdict_in = qobject_to(QDict, model->props); ++ if (!qdict_in) { ++ error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict"); ++ return NULL; ++ } ++ } ++ ++ obj = object_new(object_class_get_name(oc)); ++ ++ if (qdict_in) { ++ Visitor *visitor; ++ Error *err = NULL; ++ ++ visitor = qobject_input_visitor_new(model->props); ++ visit_start_struct(visitor, NULL, NULL, 0, &err); ++ if (err) { ++ visit_free(visitor); ++ object_unref(obj); ++ error_propagate(errp, err); ++ return NULL; ++ } ++ ++ i = 0; ++ while ((name = cpu_model_advertised_features[i++]) != NULL) { ++ if (qdict_get(qdict_in, name)) { ++ object_property_set(obj, visitor, name, &err); ++ if (err) { ++ break; ++ } ++ } ++ } ++ ++ if (!err) { ++ visit_check_struct(visitor, &err); ++ } ++ visit_end_struct(visitor, NULL); ++ visit_free(visitor); ++ if (err) { ++ object_unref(obj); ++ error_propagate(errp, err); ++ return NULL; ++ } ++ } ++ ++ expansion_info = g_new0(CpuModelExpansionInfo, 1); ++ expansion_info->model = g_malloc0(sizeof(*expansion_info->model)); ++ expansion_info->model->name = g_strdup(model->name); ++ ++ qdict_out = qdict_new(); ++ ++ i = 0; ++ while ((name = cpu_model_advertised_features[i++]) != NULL) { ++ ObjectProperty *prop = object_property_find(obj, name, NULL); ++ if (prop) { ++ Error *err = NULL; ++ QObject *value; ++ ++ assert(prop->get); ++ value = object_property_get_qobject(obj, name, &err); ++ assert(!err); ++ ++ qdict_put_obj(qdict_out, name, value); ++ } ++ } ++ ++ if (!qdict_size(qdict_out)) { ++ qobject_unref(qdict_out); ++ } else { ++ expansion_info->model->props = QOBJECT(qdict_out); ++ expansion_info->model->has_props = true; ++ } ++ ++ object_unref(obj); ++ ++ return expansion_info; ++} +-- +2.25.1 + diff --git a/target-arm-monitor-query-cpu-model-expansion-crashed.patch b/target-arm-monitor-query-cpu-model-expansion-crashed.patch new file mode 100644 index 0000000000000000000000000000000000000000..60973a7c2233a8e57f23d89c69a0c3a972835e8b --- /dev/null +++ b/target-arm-monitor-query-cpu-model-expansion-crashed.patch @@ -0,0 +1,59 @@ +From 5d75b922480f3fbefe83b5bb5e241e56a16e1e3e Mon Sep 17 00:00:00 2001 +From: Liang Yan +Date: Fri, 7 Feb 2020 14:04:21 +0000 +Subject: [PATCH 13/13] target/arm/monitor: query-cpu-model-expansion crashed + qemu when using machine type none + +Commit e19afd566781 mentioned that target-arm only supports queryable +cpu models 'max', 'host', and the current type when KVM is in use. +The logic works well until using machine type none. + +For machine type none, cpu_type will be null if cpu option is not +set by command line, strlen(cpu_type) will terminate process. +So We add a check above it. + +This won't affect i386 and s390x since they do not use current_cpu. + +Signed-off-by: Liang Yan +Message-id: 20200203134251.12986-1-lyan@suse.com +Reviewed-by: Andrew Jones +Tested-by: Andrew Jones +Signed-off-by: Peter Maydell +--- + target/arm/monitor.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/target/arm/monitor.c b/target/arm/monitor.c +index 560970de..e2b1d117 100644 +--- a/target/arm/monitor.c ++++ b/target/arm/monitor.c +@@ -131,17 +131,20 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + } + + if (kvm_enabled()) { +- const char *cpu_type = current_machine->cpu_type; +- int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); + bool supported = false; + + if (!strcmp(model->name, "host") || !strcmp(model->name, "max")) { + /* These are kvmarm's recommended cpu types */ + supported = true; +- } else if (strlen(model->name) == len && +- !strncmp(model->name, cpu_type, len)) { +- /* KVM is enabled and we're using this type, so it works. */ +- supported = true; ++ } else if (current_machine->cpu_type) { ++ const char *cpu_type = current_machine->cpu_type; ++ int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX); ++ ++ if (strlen(model->name) == len && ++ !strncmp(model->name, cpu_type, len)) { ++ /* KVM is enabled and we're using this type, so it works. */ ++ supported = true; ++ } + } + if (!supported) { + error_setg(errp, "We cannot guarantee the CPU type '%s' works " +-- +2.25.1 + diff --git a/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch b/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch new file mode 100644 index 0000000000000000000000000000000000000000..91702dbe0cbcc5fff7a113ff2b75a90026521500 --- /dev/null +++ b/target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch @@ -0,0 +1,31 @@ +From 88e3146118230de8b99280db219a6a6c47bebce1 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Wed, 16 Sep 2020 19:40:28 +0800 +Subject: [PATCH] target/arm: only set ID_PFR1_EL1.GIC for AArch32 guest + +Some AArch64 CPU doesn't support AArch32 mode, and the values of AArch32 +registers are all 0. Hence, We'd better not to modify AArch32 registers +in AArch64 mode. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/helper.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 97b6b86197..b262f5d6c5 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -5672,7 +5672,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri) + ARMCPU *cpu = env_archcpu(env); + uint64_t pfr1 = cpu->id_pfr1; + +- if (env->gicv3state) { ++ if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && env->gicv3state) { + pfr1 |= 1 << 28; + } + return pfr1; +-- +2.23.0 + diff --git a/target-arm-parse-cpu-feature-related-options.patch b/target-arm-parse-cpu-feature-related-options.patch new file mode 100644 index 0000000000000000000000000000000000000000..066e231af6266d21eecaf5d0c519b8a6aa4069d6 --- /dev/null +++ b/target-arm-parse-cpu-feature-related-options.patch @@ -0,0 +1,124 @@ +From dca1df05ce3d6b17d03203fc6fd94e23548216c7 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:35 +0800 +Subject: [PATCH 2/9] target/arm: parse cpu feature related options + +The implementation of CPUClass::parse_features only supports CPU +features in "feature=value" format. However, libvirt maybe send us a +CPU feature string in "+feature/-feature" format. Hence, we need to +override CPUClass::parse_features to support CPU feature string in both +"feature=value" and "+feature/-feature" format. + +The logic of AArch64CPUClass::parse_features is similar to that of +X86CPUClass::parse_features. + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu64.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 83 insertions(+) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index fe648752..7de20848 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -506,6 +506,88 @@ static void arm_cpu_parse_featurestr(const char *typename, char *features, + } + } + ++static void ++cpu_add_feat_as_prop(const char *typename, const char *name, const char *val) ++{ ++ GlobalProperty *prop = g_new0(typeof(*prop), 1); ++ prop->driver = typename; ++ prop->property = g_strdup(name); ++ prop->value = g_strdup(val); ++ qdev_prop_register_global(prop); ++} ++ ++static gint compare_string(gconstpointer a, gconstpointer b) ++{ ++ return g_strcmp0(a, b); ++} ++ ++static GList *plus_features, *minus_features; ++ ++static void aarch64_cpu_parse_features(const char *typename, char *features, ++ Error **errp) ++{ ++ GList *l; ++ char *featurestr; /* Single 'key=value" string being parsed */ ++ static bool cpu_globals_initialized; ++ ++ if (cpu_globals_initialized) { ++ return; ++ } ++ cpu_globals_initialized = true; ++ ++ if (!features) { ++ return; ++ } ++ for (featurestr = strtok(features, ","); ++ featurestr; ++ featurestr = strtok(NULL, ",")) { ++ const char *name; ++ const char *val = NULL; ++ char *eq = NULL; ++ ++ /* Compatibility syntax: */ ++ if (featurestr[0] == '+') { ++ plus_features = g_list_append(plus_features, ++ g_strdup(featurestr + 1)); ++ continue; ++ } else if (featurestr[0] == '-') { ++ minus_features = g_list_append(minus_features, ++ g_strdup(featurestr + 1)); ++ continue; ++ } ++ ++ eq = strchr(featurestr, '='); ++ name = featurestr; ++ if (eq) { ++ *eq++ = 0; ++ val = eq; ++ } else { ++ error_setg(errp, "Unsupported property format: %s", name); ++ return; ++ } ++ ++ if (g_list_find_custom(plus_features, name, compare_string)) { ++ warn_report("Ambiguous CPU model string. " ++ "Don't mix both \"+%s\" and \"%s=%s\"", ++ name, name, val); ++ } ++ if (g_list_find_custom(minus_features, name, compare_string)) { ++ warn_report("Ambiguous CPU model string. " ++ "Don't mix both \"-%s\" and \"%s=%s\"", ++ name, name, val); ++ } ++ cpu_add_feat_as_prop(typename, name, val); ++ } ++ ++ for (l = plus_features; l; l = l->next) { ++ cpu_add_feat_as_prop(typename, l->data, "on"); ++ } ++ ++ for (l = minus_features; l; l = l->next) { ++ cpu_add_feat_as_prop(typename, l->data, "off"); ++ } ++} ++ + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); +@@ -517,6 +599,7 @@ static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + cc->gdb_num_core_regs = 34; + cc->gdb_core_xml_file = "aarch64-core.xml"; + cc->gdb_arch_name = aarch64_gdb_arch_name; ++ cc->parse_features = aarch64_cpu_parse_features; + } + + static void aarch64_cpu_instance_init(Object *obj) +-- +2.25.1 + diff --git a/target-arm-register-CPU-features-for-property.patch b/target-arm-register-CPU-features-for-property.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea42a63ea7620fc790d01ae94590fb4336e12c32 --- /dev/null +++ b/target-arm-register-CPU-features-for-property.patch @@ -0,0 +1,398 @@ +From f169b1f76cad9f727c701df853b05ad5e8d7f927 Mon Sep 17 00:00:00 2001 +From: Peng Liang +Date: Thu, 6 Aug 2020 16:14:37 +0800 +Subject: [PATCH 3/9] target/arm: register CPU features for property + +The Arm architecture specifies a number of ID registers that are +characterized as comprising a set of 4-bit ID fields. Each ID field +identifies the presence, and possibly the level of support for, a +particular feature in an implementation of the architecture. [1] + +For most of the ID fields, there is a minimum presence value, equal to +or higher than which means the corresponding CPU feature is implemented. +Hence, we can use the minimum presence value to determine whether a CPU +feature is enabled and enable a CPU feature. + +To disable a CPU feature, setting the corresponding ID field to 0x0/0xf +(for unsigned/signed field) seems as a good idea. However, it maybe +lead to some problems. For example, ID_AA64PFR0_EL1.FP is a signed ID +field. ID_AA64PFR0_EL1.FP == 0x0 represents the implementation of FP +(floating-point) and ID_AA64PFR0_EL1.FP == 0x1 represents the +implementation of FPHP (half-precision floating-point). If +ID_AA64PFR0_EL1.FP is set to 0xf when FPHP is disabled (which is also +disable FP), guest kernel maybe stuck. Hence, we add a ni_value (means +not-implemented value) to disable a CPU feature safely. + +[1] D13.1.3 Principles of the ID scheme for fields in ID registers in + DDI.0487 + +Signed-off-by: zhanghailiang +Signed-off-by: Peng Liang +--- + target/arm/cpu.c | 343 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 343 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 5bcdad0c..3f63312c 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -1034,6 +1034,347 @@ static void arm_set_init_svtor(Object *obj, Visitor *v, const char *name, + visit_type_uint32(v, name, &cpu->init_svtor, errp); + } + ++/** ++ * CPUFeatureInfo: ++ * @reg: The ID register where the ID field is in. ++ * @name: The name of the CPU feature. ++ * @length: The bit length of the ID field. ++ * @shift: The bit shift of the ID field in the ID register. ++ * @min_value: The minimum value equal to or larger than which means the CPU ++ * feature is implemented. ++ * @ni_value: Not-implemented value. It will be set to the ID field when ++ * disabling the CPU feature. Usually, it's min_value - 1. ++ * @sign: Whether the ID field is signed. ++ * @is_32bit: Whether the CPU feature is for 32-bit. ++ * ++ * In ARM, a CPU feature is described by an ID field, which is a 4-bit field in ++ * an ID register. ++ */ ++typedef struct CPUFeatureInfo { ++ CPUIDReg reg; ++ const char *name; ++ int length; ++ int shift; ++ int min_value; ++ int ni_value; ++ bool sign; ++ bool is_32bit; ++} CPUFeatureInfo; ++ ++#define FIELD_INFO(feature_name, id_reg, field, s, min_val, ni_val, is32bit) { \ ++ .reg = id_reg, \ ++ .length = R_ ## id_reg ## _ ## field ## _LENGTH, \ ++ .shift = R_ ## id_reg ## _ ## field ## _SHIFT, \ ++ .sign = s, \ ++ .min_value = min_val, \ ++ .ni_value = ni_val, \ ++ .name = feature_name, \ ++ .is_32bit = is32bit, \ ++} ++ ++static struct CPUFeatureInfo cpu_features[] = { ++ FIELD_INFO("swap", ID_ISAR0, SWAP, false, 1, 0, true), ++ FIELD_INFO("bitcount", ID_ISAR0, BITCOUNT, false, 1, 0, true), ++ FIELD_INFO("bitfield", ID_ISAR0, BITFIELD, false, 1, 0, true), ++ FIELD_INFO("cmpbranch", ID_ISAR0, CMPBRANCH, false, 1, 0, true), ++ FIELD_INFO("coproc", ID_ISAR0, COPROC, false, 1, 0, true), ++ FIELD_INFO("debug", ID_ISAR0, DEBUG, false, 1, 0, true), ++ FIELD_INFO("device", ID_ISAR0, DIVIDE, false, 1, 0, true), ++ ++ FIELD_INFO("endian", ID_ISAR1, ENDIAN, false, 1, 0, true), ++ FIELD_INFO("except", ID_ISAR1, EXCEPT, false, 1, 0, true), ++ FIELD_INFO("except_ar", ID_ISAR1, EXCEPT_AR, false, 1, 0, true), ++ FIELD_INFO("extend", ID_ISAR1, EXTEND, false, 1, 0, true), ++ FIELD_INFO("ifthen", ID_ISAR1, IFTHEN, false, 1, 0, true), ++ FIELD_INFO("immediate", ID_ISAR1, IMMEDIATE, false, 1, 0, true), ++ FIELD_INFO("interwork", ID_ISAR1, INTERWORK, false, 1, 0, true), ++ FIELD_INFO("jazelle", ID_ISAR1, JAZELLE, false, 1, 0, true), ++ ++ FIELD_INFO("loadstore", ID_ISAR2, LOADSTORE, false, 1, 0, true), ++ FIELD_INFO("memhint", ID_ISAR2, MEMHINT, false, 1, 0, true), ++ FIELD_INFO("multiaccessint", ID_ISAR2, MULTIACCESSINT, false, 1, 0, true), ++ FIELD_INFO("mult", ID_ISAR2, MULT, false, 1, 0, true), ++ FIELD_INFO("mults", ID_ISAR2, MULTS, false, 1, 0, true), ++ FIELD_INFO("multu", ID_ISAR2, MULTU, false, 1, 0, true), ++ FIELD_INFO("psr_ar", ID_ISAR2, PSR_AR, false, 1, 0, true), ++ FIELD_INFO("reversal", ID_ISAR2, REVERSAL, false, 1, 0, true), ++ ++ FIELD_INFO("saturate", ID_ISAR3, SATURATE, false, 1, 0, true), ++ FIELD_INFO("simd", ID_ISAR3, SIMD, false, 1, 0, true), ++ FIELD_INFO("svc", ID_ISAR3, SVC, false, 1, 0, true), ++ FIELD_INFO("synchprim", ID_ISAR3, SYNCHPRIM, false, 1, 0, true), ++ FIELD_INFO("tabbranch", ID_ISAR3, TABBRANCH, false, 1, 0, true), ++ FIELD_INFO("t32copy", ID_ISAR3, T32COPY, false, 1, 0, true), ++ FIELD_INFO("truenop", ID_ISAR3, TRUENOP, false, 1, 0, true), ++ FIELD_INFO("t32ee", ID_ISAR3, T32EE, false, 1, 0, true), ++ ++ FIELD_INFO("unpriv", ID_ISAR4, UNPRIV, false, 1, 0, true), ++ FIELD_INFO("withshifts", ID_ISAR4, WITHSHIFTS, false, 1, 0, true), ++ FIELD_INFO("writeback", ID_ISAR4, WRITEBACK, false, 1, 0, true), ++ FIELD_INFO("smc", ID_ISAR4, SMC, false, 1, 0, true), ++ FIELD_INFO("barrier", ID_ISAR4, BARRIER, false, 1, 0, true), ++ FIELD_INFO("synchprim_frac", ID_ISAR4, SYNCHPRIM_FRAC, false, 1, 0, true), ++ FIELD_INFO("psr_m", ID_ISAR4, PSR_M, false, 1, 0, true), ++ FIELD_INFO("swp_frac", ID_ISAR4, SWP_FRAC, false, 1, 0, true), ++ ++ FIELD_INFO("sevl", ID_ISAR5, SEVL, false, 1, 0, true), ++ FIELD_INFO("aes", ID_ISAR5, AES, false, 1, 0, true), ++ FIELD_INFO("sha1", ID_ISAR5, SHA1, false, 1, 0, true), ++ FIELD_INFO("sha2", ID_ISAR5, SHA2, false, 1, 0, true), ++ FIELD_INFO("crc32", ID_ISAR5, CRC32, false, 1, 0, true), ++ FIELD_INFO("rdm", ID_ISAR5, RDM, false, 1, 0, true), ++ FIELD_INFO("vcma", ID_ISAR5, VCMA, false, 1, 0, true), ++ ++ FIELD_INFO("jscvt", ID_ISAR6, JSCVT, false, 1, 0, true), ++ FIELD_INFO("dp", ID_ISAR6, DP, false, 1, 0, true), ++ FIELD_INFO("fhm", ID_ISAR6, FHM, false, 1, 0, true), ++ FIELD_INFO("sb", ID_ISAR6, SB, false, 1, 0, true), ++ FIELD_INFO("specres", ID_ISAR6, SPECRES, false, 1, 0, true), ++ ++ FIELD_INFO("cmaintva", ID_MMFR3, CMAINTVA, false, 1, 0, true), ++ FIELD_INFO("cmaintsw", ID_MMFR3, CMAINTSW, false, 1, 0, true), ++ FIELD_INFO("bpmaint", ID_MMFR3, BPMAINT, false, 1, 0, true), ++ FIELD_INFO("maintbcst", ID_MMFR3, MAINTBCST, false, 1, 0, true), ++ FIELD_INFO("pan", ID_MMFR3, PAN, false, 1, 0, true), ++ FIELD_INFO("cohwalk", ID_MMFR3, COHWALK, false, 1, 0, true), ++ FIELD_INFO("cmemsz", ID_MMFR3, CMEMSZ, false, 1, 0, true), ++ FIELD_INFO("supersec", ID_MMFR3, SUPERSEC, false, 1, 0, true), ++ ++ FIELD_INFO("specsei", ID_MMFR4, SPECSEI, false, 1, 0, true), ++ FIELD_INFO("ac2", ID_MMFR4, AC2, false, 1, 0, true), ++ FIELD_INFO("xnx", ID_MMFR4, XNX, false, 1, 0, true), ++ FIELD_INFO("cnp", ID_MMFR4, CNP, false, 1, 0, true), ++ FIELD_INFO("hpds", ID_MMFR4, HPDS, false, 1, 0, true), ++ FIELD_INFO("lsm", ID_MMFR4, LSM, false, 1, 0, true), ++ FIELD_INFO("ccidx", ID_MMFR4, CCIDX, false, 1, 0, true), ++ FIELD_INFO("evt", ID_MMFR4, EVT, false, 1, 0, true), ++ ++ FIELD_INFO("simdreg", MVFR0, SIMDREG, false, 1, 0, true), ++ FIELD_INFO("fpsp", MVFR0, FPSP, false, 1, 0, true), ++ FIELD_INFO("fpdp", MVFR0, FPDP, false, 1, 0, true), ++ FIELD_INFO("fptrap", MVFR0, FPTRAP, false, 1, 0, true), ++ FIELD_INFO("fpdivide", MVFR0, FPDIVIDE, false, 1, 0, true), ++ FIELD_INFO("fpsqrt", MVFR0, FPSQRT, false, 1, 0, true), ++ FIELD_INFO("fpshvec", MVFR0, FPSHVEC, false, 1, 0, true), ++ FIELD_INFO("fpround", MVFR0, FPROUND, false, 1, 0, true), ++ ++ FIELD_INFO("fpftz", MVFR1, FPFTZ, false, 1, 0, true), ++ FIELD_INFO("fpdnan", MVFR1, FPDNAN, false, 1, 0, true), ++ FIELD_INFO("simdls", MVFR1, SIMDLS, false, 1, 0, true), ++ FIELD_INFO("simdint", MVFR1, SIMDINT, false, 1, 0, true), ++ FIELD_INFO("simdsp", MVFR1, SIMDSP, false, 1, 0, true), ++ FIELD_INFO("simdhp", MVFR1, SIMDHP, false, 1, 0, true), ++ FIELD_INFO("fphp", MVFR1, FPHP, false, 1, 0, true), ++ FIELD_INFO("simdfmac", MVFR1, SIMDFMAC, false, 1, 0, true), ++ ++ FIELD_INFO("simdmisc", MVFR2, SIMDMISC, false, 1, 0, true), ++ FIELD_INFO("fpmisc", MVFR2, FPMISC, false, 1, 0, true), ++ ++ FIELD_INFO("debugver", ID_AA64DFR0, DEBUGVER, false, 1, 0, false), ++ FIELD_INFO("tracever", ID_AA64DFR0, TRACEVER, false, 1, 0, false), ++ FIELD_INFO("pmuver", ID_AA64DFR0, PMUVER, false, 1, 0, false), ++ FIELD_INFO("brps", ID_AA64DFR0, BRPS, false, 1, 0, false), ++ FIELD_INFO("wrps", ID_AA64DFR0, WRPS, false, 1, 0, false), ++ FIELD_INFO("ctx_cmps", ID_AA64DFR0, CTX_CMPS, false, 1, 0, false), ++ FIELD_INFO("pmsver", ID_AA64DFR0, PMSVER, false, 1, 0, false), ++ FIELD_INFO("doublelock", ID_AA64DFR0, DOUBLELOCK, false, 1, 0, false), ++ FIELD_INFO("tracefilt", ID_AA64DFR0, TRACEFILT, false, 1, 0, false), ++ ++ FIELD_INFO("aes", ID_AA64ISAR0, AES, false, 1, 0, false), ++ FIELD_INFO("sha1", ID_AA64ISAR0, SHA1, false, 1, 0, false), ++ FIELD_INFO("sha2", ID_AA64ISAR0, SHA2, false, 1, 0, false), ++ FIELD_INFO("crc32", ID_AA64ISAR0, CRC32, false, 1, 0, false), ++ FIELD_INFO("atomics", ID_AA64ISAR0, ATOMIC, false, 1, 0, false), ++ FIELD_INFO("asimdrdm", ID_AA64ISAR0, RDM, false, 1, 0, false), ++ FIELD_INFO("sha3", ID_AA64ISAR0, SHA3, false, 1, 0, false), ++ FIELD_INFO("sm3", ID_AA64ISAR0, SM3, false, 1, 0, false), ++ FIELD_INFO("sm4", ID_AA64ISAR0, SM4, false, 1, 0, false), ++ FIELD_INFO("asimddp", ID_AA64ISAR0, DP, false, 1, 0, false), ++ FIELD_INFO("asimdfhm", ID_AA64ISAR0, FHM, false, 1, 0, false), ++ FIELD_INFO("flagm", ID_AA64ISAR0, TS, false, 1, 0, false), ++ FIELD_INFO("tlb", ID_AA64ISAR0, TLB, false, 1, 0, false), ++ FIELD_INFO("rng", ID_AA64ISAR0, RNDR, false, 1, 0, false), ++ ++ FIELD_INFO("dcpop", ID_AA64ISAR1, DPB, false, 1, 0, false), ++ FIELD_INFO("papa", ID_AA64ISAR1, APA, false, 1, 0, false), ++ FIELD_INFO("api", ID_AA64ISAR1, API, false, 1, 0, false), ++ FIELD_INFO("jscvt", ID_AA64ISAR1, JSCVT, false, 1, 0, false), ++ FIELD_INFO("fcma", ID_AA64ISAR1, FCMA, false, 1, 0, false), ++ FIELD_INFO("lrcpc", ID_AA64ISAR1, LRCPC, false, 1, 0, false), ++ FIELD_INFO("pacg", ID_AA64ISAR1, GPA, false, 1, 0, false), ++ FIELD_INFO("gpi", ID_AA64ISAR1, GPI, false, 1, 0, false), ++ FIELD_INFO("frint", ID_AA64ISAR1, FRINTTS, false, 1, 0, false), ++ FIELD_INFO("sb", ID_AA64ISAR1, SB, false, 1, 0, false), ++ FIELD_INFO("specres", ID_AA64ISAR1, SPECRES, false, 1, 0, false), ++ ++ FIELD_INFO("el0", ID_AA64PFR0, EL0, false, 1, 0, false), ++ FIELD_INFO("el1", ID_AA64PFR0, EL1, false, 1, 0, false), ++ FIELD_INFO("el2", ID_AA64PFR0, EL2, false, 1, 0, false), ++ FIELD_INFO("el3", ID_AA64PFR0, EL3, false, 1, 0, false), ++ FIELD_INFO("fp", ID_AA64PFR0, FP, true, 0, 0xf, false), ++ FIELD_INFO("asimd", ID_AA64PFR0, ADVSIMD, true, 0, 0xf, false), ++ FIELD_INFO("gic", ID_AA64PFR0, GIC, false, 1, 0, false), ++ FIELD_INFO("ras", ID_AA64PFR0, RAS, false, 1, 0, false), ++ FIELD_INFO("sve", ID_AA64PFR0, SVE, false, 1, 0, false), ++ ++ FIELD_INFO("bti", ID_AA64PFR1, BT, false, 1, 0, false), ++ FIELD_INFO("sbss", ID_AA64PFR1, SBSS, false, 1, 0, false), ++ FIELD_INFO("mte", ID_AA64PFR1, MTE, false, 1, 0, false), ++ FIELD_INFO("ras_frac", ID_AA64PFR1, RAS_FRAC, false, 1, 0, false), ++ ++ FIELD_INFO("parange", ID_AA64MMFR0, PARANGE, false, 1, 0, false), ++ FIELD_INFO("asidbits", ID_AA64MMFR0, ASIDBITS, false, 1, 0, false), ++ FIELD_INFO("bigend", ID_AA64MMFR0, BIGEND, false, 1, 0, false), ++ FIELD_INFO("snsmem", ID_AA64MMFR0, SNSMEM, false, 1, 0, false), ++ FIELD_INFO("bigendel0", ID_AA64MMFR0, BIGENDEL0, false, 1, 0, false), ++ FIELD_INFO("tgran16", ID_AA64MMFR0, TGRAN16, false, 1, 0, false), ++ FIELD_INFO("tgran64", ID_AA64MMFR0, TGRAN64, false, 1, 0, false), ++ FIELD_INFO("tgran4", ID_AA64MMFR0, TGRAN4, false, 1, 0, false), ++ FIELD_INFO("tgran16_2", ID_AA64MMFR0, TGRAN16_2, false, 1, 0, false), ++ FIELD_INFO("tgran64_2", ID_AA64MMFR0, TGRAN64_2, false, 1, 0, false), ++ FIELD_INFO("tgran4_2", ID_AA64MMFR0, TGRAN4_2, false, 1, 0, false), ++ FIELD_INFO("exs", ID_AA64MMFR0, EXS, false, 1, 0, false), ++ ++ FIELD_INFO("hafdbs", ID_AA64MMFR1, HAFDBS, false, 1, 0, false), ++ FIELD_INFO("vmidbits", ID_AA64MMFR1, VMIDBITS, false, 1, 0, false), ++ FIELD_INFO("vh", ID_AA64MMFR1, VH, false, 1, 0, false), ++ FIELD_INFO("hpds", ID_AA64MMFR1, HPDS, false, 1, 0, false), ++ FIELD_INFO("lo", ID_AA64MMFR1, LO, false, 1, 0, false), ++ FIELD_INFO("pan", ID_AA64MMFR1, PAN, false, 1, 0, false), ++ FIELD_INFO("specsei", ID_AA64MMFR1, SPECSEI, false, 1, 0, false), ++ FIELD_INFO("xnx", ID_AA64MMFR1, XNX, false, 1, 0, false), ++ ++ FIELD_INFO("cnp", ID_AA64MMFR2, CNP, false, 1, 0, false), ++ FIELD_INFO("uao", ID_AA64MMFR2, UAO, false, 1, 0, false), ++ FIELD_INFO("lsm", ID_AA64MMFR2, LSM, false, 1, 0, false), ++ FIELD_INFO("iesb", ID_AA64MMFR2, IESB, false, 1, 0, false), ++ FIELD_INFO("varange", ID_AA64MMFR2, VARANGE, false, 1, 0, false), ++ FIELD_INFO("ccidx", ID_AA64MMFR2, CCIDX, false, 1, 0, false), ++ FIELD_INFO("nv", ID_AA64MMFR2, NV, false, 1, 0, false), ++ FIELD_INFO("st", ID_AA64MMFR2, ST, false, 1, 0, false), ++ FIELD_INFO("uscat", ID_AA64MMFR2, AT, false, 1, 0, false), ++ FIELD_INFO("ids", ID_AA64MMFR2, IDS, false, 1, 0, false), ++ FIELD_INFO("fwb", ID_AA64MMFR2, FWB, false, 1, 0, false), ++ FIELD_INFO("ttl", ID_AA64MMFR2, TTL, false, 1, 0, false), ++ FIELD_INFO("bbm", ID_AA64MMFR2, BBM, false, 1, 0, false), ++ FIELD_INFO("evt", ID_AA64MMFR2, EVT, false, 1, 0, false), ++ FIELD_INFO("e0pd", ID_AA64MMFR2, E0PD, false, 1, 0, false), ++ ++ FIELD_INFO("copdbg", ID_DFR0, COPDBG, false, 1, 0, false), ++ FIELD_INFO("copsdbg", ID_DFR0, COPSDBG, false, 1, 0, false), ++ FIELD_INFO("mmapdbg", ID_DFR0, MMAPDBG, false, 1, 0, false), ++ FIELD_INFO("coptrc", ID_DFR0, COPTRC, false, 1, 0, false), ++ FIELD_INFO("mmaptrc", ID_DFR0, MMAPTRC, false, 1, 0, false), ++ FIELD_INFO("mprofdbg", ID_DFR0, MPROFDBG, false, 1, 0, false), ++ FIELD_INFO("perfmon", ID_DFR0, PERFMON, false, 1, 0, false), ++ FIELD_INFO("tracefilt", ID_DFR0, TRACEFILT, false, 1, 0, false), ++ ++ { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_FP_LENGTH, ++ .shift = R_ID_AA64PFR0_FP_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "fphp", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64PFR0, .length = R_ID_AA64PFR0_ADVSIMD_LENGTH, ++ .shift = R_ID_AA64PFR0_ADVSIMD_SHIFT, .sign = true, .min_value = 1, ++ .ni_value = 0, .name = "asimdhp", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_AES_LENGTH, ++ .shift = R_ID_AA64ISAR0_AES_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "pmull", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_SHA2_LENGTH, ++ .shift = R_ID_AA64ISAR0_SHA2_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "sha512", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64ISAR0, .length = R_ID_AA64ISAR0_TS_LENGTH, ++ .shift = R_ID_AA64ISAR0_TS_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "flagm2", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_DPB_LENGTH, ++ .shift = R_ID_AA64ISAR1_DPB_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "dcpodp", .is_32bit = false, ++ }, ++ { ++ .reg = ID_AA64ISAR1, .length = R_ID_AA64ISAR1_LRCPC_LENGTH, ++ .shift = R_ID_AA64ISAR1_LRCPC_SHIFT, .sign = false, .min_value = 2, ++ .ni_value = 1, .name = "ilrcpc", .is_32bit = false, ++ }, ++}; ++ ++static void arm_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ CPUFeatureInfo *feat = opaque; ++ int field_value = feat->sign ? sextract64(cpu->isar.regs[feat->reg], ++ feat->shift, feat->length) : ++ extract64(cpu->isar.regs[feat->reg], ++ feat->shift, feat->length); ++ bool value = field_value >= feat->min_value; ++ ++ visit_type_bool(v, name, &value, errp); ++} ++ ++static void arm_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ DeviceState *dev = DEVICE(obj); ++ ARMCPU *cpu = ARM_CPU(obj); ++ ARMISARegisters *isar = &cpu->isar; ++ CPUFeatureInfo *feat = opaque; ++ Error *local_err = NULL; ++ bool value; ++ ++ if (dev->realized) { ++ qdev_prop_set_after_realize(dev, name, errp); ++ return; ++ } ++ ++ visit_type_bool(v, name, &value, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ if (value) { ++ isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], ++ feat->shift, feat->length, ++ feat->min_value); ++ } else { ++ isar->regs[feat->reg] = deposit64(isar->regs[feat->reg], ++ feat->shift, feat->length, ++ feat->ni_value); ++ } ++} ++ ++static void arm_cpu_register_feature_props(ARMCPU *cpu) ++{ ++ int i; ++ int num = ARRAY_SIZE(cpu_features); ++ ObjectProperty *op; ++ CPUARMState *env = &cpu->env; ++ ++ for (i = 0; i < num; i++) { ++ if ((arm_feature(env, ARM_FEATURE_AARCH64) && cpu_features[i].is_32bit) ++ || (!arm_feature(env, ARM_FEATURE_AARCH64) && ++ cpu_features[i].is_32bit)) { ++ continue; ++ } ++ op = object_property_find(OBJECT(cpu), cpu_features[i].name, NULL); ++ if (!op) { ++ object_property_add(OBJECT(cpu), cpu_features[i].name, "bool", ++ arm_cpu_get_bit_prop, ++ arm_cpu_set_bit_prop, ++ NULL, &cpu_features[i], &error_abort); ++ } ++ } ++} ++ + void arm_cpu_post_init(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -1150,6 +1491,8 @@ void arm_cpu_post_init(Object *obj) + + qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, + &error_abort); ++ ++ arm_cpu_register_feature_props(cpu); + } + + static void arm_cpu_finalizefn(Object *obj) +-- +2.25.1 + diff --git a/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch b/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..eedd3356bf33b6fd02950740fa81330cbbed2895 --- /dev/null +++ b/target-i386-Add-Snowridge-v2-no-MPX-CPU-model.patch @@ -0,0 +1,43 @@ +From ce4bb30a650773833cd1e86afcaa30e47259085c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Sat, 12 Oct 2019 10:47:48 +0800 +Subject: [PATCH] target/i386: Add Snowridge-v2 (no MPX) CPU model + +Add new version of Snowridge CPU model that removes MPX feature. + +MPX support is being phased out by Intel. GCC has dropped it, Linux kernel +and KVM are also going to do that in the future. + +Signed-off-by: Xiaoyao Li +Message-Id: <20191012024748.127135-1-xiaoyao.li@intel.com> +Signed-off-by: Eduardo Habkost +--- + target/i386/cpu.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d3742ef4ac..f09612f9da 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2668,6 +2668,18 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_6_EAX_ARAT, + .xlevel = 0x80000008, + .model_id = "Intel Atom Processor (SnowRidge)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { ++ .version = 2, ++ .props = (PropValue[]) { ++ { "mpx", "off" }, ++ { "model-id", "Intel Atom Processor (Snowridge, no MPX)" }, ++ { /* end of list */ }, ++ }, ++ }, ++ { /* end of list */ }, ++ }, + }, + { + .name = "KnightsMill", +-- +2.27.0 + diff --git a/target-i386-Add-missed-security-features-to-Cooperla.patch b/target-i386-Add-missed-security-features-to-Cooperla.patch new file mode 100644 index 0000000000000000000000000000000000000000..d17e0c00e9a638c2c1dd715a7c6f1f1eb5a14474 --- /dev/null +++ b/target-i386-Add-missed-security-features-to-Cooperla.patch @@ -0,0 +1,35 @@ +From 97d5c6c621569b011a2122423d0f630bd71de5ff Mon Sep 17 00:00:00 2001 +From: Jingyi Wang +Date: Fri, 9 Jul 2021 11:17:19 +0800 +Subject: [PATCH] target/i386: Add missed security features to Cooperlake CPU + model + +It lacks two security feature bits in MSR_IA32_ARCH_CAPABILITIES in +current Cooperlake CPU model, so add them. + +This is part of uptream commit 2dea9d9 + +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5329d73316..50d6ef9de4 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2420,7 +2420,8 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | +- MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO, ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + /* +-- +2.27.0 + diff --git a/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch b/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch new file mode 100644 index 0000000000000000000000000000000000000000..3aff7ea35f37047933f6f4464b513feaa242cf69 --- /dev/null +++ b/target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch @@ -0,0 +1,47 @@ +From 05b13a8de90abc6c1cfeca8b9c436e60e6d3142e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 25 Dec 2019 14:30:17 +0800 +Subject: [PATCH] target/i386: Add new bit definitions of + MSR_IA32_ARCH_CAPABILITIES + +The bit 6, 7 and 8 of MSR_IA32_ARCH_CAPABILITIES are recently disclosed +for some security issues. Add the definitions for them to be used by named +CPU models. + +Signed-off-by: Xiaoyao Li +Message-Id: <20191225063018.20038-2-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.h | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 58d8c48964..7ff8ddd464 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -743,12 +743,15 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) + + /* MSR Feature Bits */ +-#define MSR_ARCH_CAP_RDCL_NO (1U << 0) +-#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) +-#define MSR_ARCH_CAP_RSBA (1U << 2) ++#define MSR_ARCH_CAP_RDCL_NO (1U << 0) ++#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) ++#define MSR_ARCH_CAP_RSBA (1U << 2) + #define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) +-#define MSR_ARCH_CAP_SSB_NO (1U << 4) +-#define MSR_ARCH_CAP_MDS_NO (1U << 5) ++#define MSR_ARCH_CAP_SSB_NO (1U << 4) ++#define MSR_ARCH_CAP_MDS_NO (1U << 5) ++#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) ++#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) ++#define MSR_ARCH_CAP_TAA_NO (1U << 8) + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +-- +2.27.0 + diff --git a/target-i386-Export-TAA_NO-bit-to-guests.patch b/target-i386-Export-TAA_NO-bit-to-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d995dc840bbace44c4d59b61f70167e960e2699 --- /dev/null +++ b/target-i386-Export-TAA_NO-bit-to-guests.patch @@ -0,0 +1,36 @@ +From c828229e1dc4a3d0837071db4c08f7860dc24755 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Mon, 18 Nov 2019 23:23:27 -0800 +Subject: [PATCH] target/i386: Export TAA_NO bit to guests + +TSX Async Abort (TAA) is a side channel attack on internal buffers in +some Intel processors similar to Microachitectural Data Sampling (MDS). + +Some future Intel processors will use the ARCH_CAP_TAA_NO bit in the +IA32_ARCH_CAPABILITIES MSR to report that they are not vulnerable to +TAA. Make this bit available to guests. + +Signed-off-by: Pawan Gupta +Signed-off-by: Paolo Bonzini + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 29836cb2a5..5af4fca350 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1209,7 +1209,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", + "ssb-no", "mds-no", "pschange-mc-no", NULL, +- NULL, NULL, NULL, NULL, ++ "taa-no", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +2.27.0 + diff --git a/target-i386-Introduce-Denverton-CPU-model.patch b/target-i386-Introduce-Denverton-CPU-model.patch new file mode 100644 index 0000000000000000000000000000000000000000..3e9debe339e6d1b0f7c0ca0ad019129de668363b --- /dev/null +++ b/target-i386-Introduce-Denverton-CPU-model.patch @@ -0,0 +1,79 @@ +From 7d602cefa04f4992d913683c1a5826abc4806e41 Mon Sep 17 00:00:00 2001 +From: Tao Xu +Date: Thu, 18 Jul 2019 15:34:05 +0800 +Subject: [PATCH] target/i386: Introduce Denverton CPU model + +Denverton is the Atom Processor of Intel Harrisonville platform. + +For more information: +https://ark.intel.com/content/www/us/en/ark/products/\ +codename/63508/denverton.html + +Signed-off-by: Tao Xu +Message-Id: <20190718073405.28301-1-tao3.xu@intel.com> +Signed-off-by: Eduardo Habkost +--- + target/i386/cpu.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5af4fca350..d3742ef4ac 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2552,6 +2552,53 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Icelake)", + }, ++ { ++ .name = "Denverton", ++ .level = 21, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 95, ++ .stepping = 1, ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_MONITOR | ++ CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | ++ CPUID_EXT_AES | CPUID_EXT_XSAVE | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_SMAP | ++ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ /* ++ * Missing: XSAVES (not supported by some Linux versions, ++ * including v4.1 to v4.12). ++ * KVM doesn't yet expose any XSAVES state save component, ++ * and the only one defined in Skylake (processor tracing) ++ * probably will block migration anyway. ++ */ ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Atom Processor (Denverton)", ++ }, + { + .name = "Snowridge", + .level = 27, +-- +2.27.0 + diff --git a/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch b/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cba87b020b3f5a01fbc2f74958d8e9c03a9d1a2 --- /dev/null +++ b/target-i386-add-PSCHANGE_NO-bit-for-the-ARCH_CAPABIL.patch @@ -0,0 +1,32 @@ +From 4372535d5f2f50b24d14ec8a3393aebec938fb61 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 13 Nov 2019 15:54:35 +0100 +Subject: [PATCH] target/i386: add PSCHANGE_NO bit for the ARCH_CAPABILITIES + MSR + +This is required to disable ITLB multihit mitigations in nested +hypervisors. + +Signed-off-by: Paolo Bonzini + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 50d6ef9de4..29836cb2a5 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1208,7 +1208,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", +- "ssb-no", "mds-no", NULL, NULL, ++ "ssb-no", "mds-no", "pschange-mc-no", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +-- +2.27.0 + diff --git a/target-i386-add-VMX-definitions.patch b/target-i386-add-VMX-definitions.patch new file mode 100644 index 0000000000000000000000000000000000000000..4365e3a7a1082abf15b5e9c51a7c3801e27ee806 --- /dev/null +++ b/target-i386-add-VMX-definitions.patch @@ -0,0 +1,164 @@ +From 9fb16fc548fca297086be0efe20345160660f340 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 18:24:52 +0200 +Subject: [PATCH] target/i386: add VMX definitions + +These will be used to compile the list of VMX features for named +CPU models, and/or by the code that sets up the VMX MSRs. + +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.h | 130 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 130 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 9a105b2251..b4be6ffb1f 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -452,6 +452,25 @@ typedef enum X86Seg { + #define MSR_IA32_BNDCFGS 0x00000d90 + #define MSR_IA32_XSS 0x00000da0 + ++#define MSR_IA32_VMX_BASIC 0x00000480 ++#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 ++#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 ++#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 ++#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 ++#define MSR_IA32_VMX_MISC 0x00000485 ++#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 ++#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 ++#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 ++#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 ++#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a ++#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b ++#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c ++#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d ++#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e ++#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f ++#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 ++#define MSR_IA32_VMX_VMFUNC 0x00000491 ++ + #define XSTATE_FP_BIT 0 + #define XSTATE_SSE_BIT 1 + #define XSTATE_YMM_BIT 2 +@@ -757,6 +776,117 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + + #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + ++/* VMX MSR features */ ++#define MSR_VMX_BASIC_VMCS_REVISION_MASK 0x7FFFFFFFull ++#define MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK (0x00001FFFull << 32) ++#define MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK (0x003C0000ull << 32) ++#define MSR_VMX_BASIC_DUAL_MONITOR (1ULL << 49) ++#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) ++#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) ++ ++#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full ++#define MSR_VMX_MISC_STORE_LMA (1ULL << 5) ++#define MSR_VMX_MISC_ACTIVITY_HLT (1ULL << 6) ++#define MSR_VMX_MISC_ACTIVITY_SHUTDOWN (1ULL << 7) ++#define MSR_VMX_MISC_ACTIVITY_WAIT_SIPI (1ULL << 8) ++#define MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK 0x0E000000ull ++#define MSR_VMX_MISC_VMWRITE_VMEXIT (1ULL << 29) ++#define MSR_VMX_MISC_ZERO_LEN_INJECT (1ULL << 30) ++ ++#define MSR_VMX_EPT_EXECONLY (1ULL << 0) ++#define MSR_VMX_EPT_PAGE_WALK_LENGTH_4 (1ULL << 6) ++#define MSR_VMX_EPT_PAGE_WALK_LENGTH_5 (1ULL << 7) ++#define MSR_VMX_EPT_UC (1ULL << 8) ++#define MSR_VMX_EPT_WB (1ULL << 14) ++#define MSR_VMX_EPT_2MB (1ULL << 16) ++#define MSR_VMX_EPT_1GB (1ULL << 17) ++#define MSR_VMX_EPT_INVEPT (1ULL << 20) ++#define MSR_VMX_EPT_AD_BITS (1ULL << 21) ++#define MSR_VMX_EPT_ADVANCED_VMEXIT_INFO (1ULL << 22) ++#define MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT (1ULL << 25) ++#define MSR_VMX_EPT_INVEPT_ALL_CONTEXT (1ULL << 26) ++#define MSR_VMX_EPT_INVVPID (1ULL << 32) ++#define MSR_VMX_EPT_INVVPID_SINGLE_ADDR (1ULL << 40) ++#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT (1ULL << 41) ++#define MSR_VMX_EPT_INVVPID_ALL_CONTEXT (1ULL << 42) ++#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS (1ULL << 43) ++ ++#define MSR_VMX_VMFUNC_EPT_SWITCHING (1ULL << 0) ++ ++ ++/* VMX controls */ ++#define VMX_CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 ++#define VMX_CPU_BASED_USE_TSC_OFFSETING 0x00000008 ++#define VMX_CPU_BASED_HLT_EXITING 0x00000080 ++#define VMX_CPU_BASED_INVLPG_EXITING 0x00000200 ++#define VMX_CPU_BASED_MWAIT_EXITING 0x00000400 ++#define VMX_CPU_BASED_RDPMC_EXITING 0x00000800 ++#define VMX_CPU_BASED_RDTSC_EXITING 0x00001000 ++#define VMX_CPU_BASED_CR3_LOAD_EXITING 0x00008000 ++#define VMX_CPU_BASED_CR3_STORE_EXITING 0x00010000 ++#define VMX_CPU_BASED_CR8_LOAD_EXITING 0x00080000 ++#define VMX_CPU_BASED_CR8_STORE_EXITING 0x00100000 ++#define VMX_CPU_BASED_TPR_SHADOW 0x00200000 ++#define VMX_CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 ++#define VMX_CPU_BASED_MOV_DR_EXITING 0x00800000 ++#define VMX_CPU_BASED_UNCOND_IO_EXITING 0x01000000 ++#define VMX_CPU_BASED_USE_IO_BITMAPS 0x02000000 ++#define VMX_CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 ++#define VMX_CPU_BASED_USE_MSR_BITMAPS 0x10000000 ++#define VMX_CPU_BASED_MONITOR_EXITING 0x20000000 ++#define VMX_CPU_BASED_PAUSE_EXITING 0x40000000 ++#define VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 ++ ++#define VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 ++#define VMX_SECONDARY_EXEC_ENABLE_EPT 0x00000002 ++#define VMX_SECONDARY_EXEC_DESC 0x00000004 ++#define VMX_SECONDARY_EXEC_RDTSCP 0x00000008 ++#define VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 ++#define VMX_SECONDARY_EXEC_ENABLE_VPID 0x00000020 ++#define VMX_SECONDARY_EXEC_WBINVD_EXITING 0x00000040 ++#define VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 ++#define VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 ++#define VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 ++#define VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 ++#define VMX_SECONDARY_EXEC_RDRAND_EXITING 0x00000800 ++#define VMX_SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 ++#define VMX_SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 ++#define VMX_SECONDARY_EXEC_SHADOW_VMCS 0x00004000 ++#define VMX_SECONDARY_EXEC_ENCLS_EXITING 0x00008000 ++#define VMX_SECONDARY_EXEC_RDSEED_EXITING 0x00010000 ++#define VMX_SECONDARY_EXEC_ENABLE_PML 0x00020000 ++#define VMX_SECONDARY_EXEC_XSAVES 0x00100000 ++ ++#define VMX_PIN_BASED_EXT_INTR_MASK 0x00000001 ++#define VMX_PIN_BASED_NMI_EXITING 0x00000008 ++#define VMX_PIN_BASED_VIRTUAL_NMIS 0x00000020 ++#define VMX_PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 ++#define VMX_PIN_BASED_POSTED_INTR 0x00000080 ++ ++#define VMX_VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 ++#define VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 ++#define VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 ++#define VMX_VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 ++#define VMX_VM_EXIT_SAVE_IA32_PAT 0x00040000 ++#define VMX_VM_EXIT_LOAD_IA32_PAT 0x00080000 ++#define VMX_VM_EXIT_SAVE_IA32_EFER 0x00100000 ++#define VMX_VM_EXIT_LOAD_IA32_EFER 0x00200000 ++#define VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 ++#define VMX_VM_EXIT_CLEAR_BNDCFGS 0x00800000 ++#define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 ++#define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 ++ ++#define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 ++#define VMX_VM_ENTRY_IA32E_MODE 0x00000200 ++#define VMX_VM_ENTRY_SMM 0x00000400 ++#define VMX_VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 ++#define VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 ++#define VMX_VM_ENTRY_LOAD_IA32_PAT 0x00004000 ++#define VMX_VM_ENTRY_LOAD_IA32_EFER 0x00008000 ++#define VMX_VM_ENTRY_LOAD_BNDCFGS 0x00010000 ++#define VMX_VM_ENTRY_PT_CONCEAL_PIP 0x00020000 ++#define VMX_VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 ++ + /* Supported Hyper-V Enlightenments */ + #define HYPERV_FEAT_RELAXED 0 + #define HYPERV_FEAT_VAPIC 1 +-- +2.27.0 + diff --git a/target-i386-add-VMX-features-to-named-CPU-models.patch b/target-i386-add-VMX-features-to-named-CPU-models.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab42b83785e5e5fd463f64415611b98ebe06066b --- /dev/null +++ b/target-i386-add-VMX-features-to-named-CPU-models.patch @@ -0,0 +1,980 @@ +From 5a63a16d709c89b25a0a9c3c7fdf765f26dac312 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Nov 2019 18:37:53 +0100 +Subject: [PATCH] target/i386: add VMX features to named CPU models + +This allows using "-cpu Haswell,+vmx", which we did not really want to +support in QEMU but was produced by Libvirt when using the "host-model" +CPU model. Without this patch, no VMX feature is _actually_ supported +(only the basic instruction set extensions are) and KVM fails to load +in the guest. + +This was produced from the output of scripts/kvm/vmxcap using the following +very ugly Python script: + + bits = { + 'INS/OUTS instruction information': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_INS_OUTS'], + 'IA32_VMX_TRUE_*_CTLS support': ['FEAT_VMX_BASIC', 'MSR_VMX_BASIC_TRUE_CTLS'], + 'External interrupt exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_EXT_INTR_MASK'], + 'NMI exiting': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_NMI_EXITING'], + 'Virtual NMIs': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VIRTUAL_NMIS'], + 'Activate VMX-preemption timer': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_VMX_PREEMPTION_TIMER'], + 'Process posted interrupts': ['FEAT_VMX_PINBASED_CTLS', 'VMX_PIN_BASED_POSTED_INTR'], + 'Interrupt window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_INTR_PENDING'], + 'Use TSC offsetting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_TSC_OFFSETING'], + 'HLT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_HLT_EXITING'], + 'INVLPG exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_INVLPG_EXITING'], + 'MWAIT exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MWAIT_EXITING'], + 'RDPMC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDPMC_EXITING'], + 'RDTSC exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_RDTSC_EXITING'], + 'CR3-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_LOAD_EXITING'], + 'CR3-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR3_STORE_EXITING'], + 'CR8-load exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_LOAD_EXITING'], + 'CR8-store exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_CR8_STORE_EXITING'], + 'Use TPR shadow': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_TPR_SHADOW'], + 'NMI-window exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_VIRTUAL_NMI_PENDING'], + 'MOV-DR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MOV_DR_EXITING'], + 'Unconditional I/O exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_UNCOND_IO_EXITING'], + 'Use I/O bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_IO_BITMAPS'], + 'Monitor trap flag': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_TRAP_FLAG'], + 'Use MSR bitmaps': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_USE_MSR_BITMAPS'], + 'MONITOR exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_MONITOR_EXITING'], + 'PAUSE exiting': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_PAUSE_EXITING'], + 'Activate secondary control': ['FEAT_VMX_PROCBASED_CTLS', 'VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS'], + 'Virtualize APIC accesses': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES'], + 'Enable EPT': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_EPT'], + 'Descriptor-table exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_DESC'], + 'Enable RDTSCP': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDTSCP'], + 'Virtualize x2APIC mode': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE'], + 'Enable VPID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VPID'], + 'WBINVD exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_WBINVD_EXITING'], + 'Unrestricted guest': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST'], + 'APIC register emulation': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT'], + 'Virtual interrupt delivery': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY'], + 'PAUSE-loop exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING'], + 'RDRAND exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDRAND_EXITING'], + 'Enable INVPCID': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_INVPCID'], + 'Enable VM functions': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_VMFUNC'], + 'VMCS shadowing': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_SHADOW_VMCS'], + 'RDSEED exiting': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_RDSEED_EXITING'], + 'Enable PML': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_ENABLE_PML'], + 'Enable XSAVES/XRSTORS': ['FEAT_VMX_SECONDARY_CTLS', 'VMX_SECONDARY_EXEC_XSAVES'], + 'Save debug controls': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_DEBUG_CONTROLS'], + 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL'], + 'Acknowledge interrupt on exit': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_ACK_INTR_ON_EXIT'], + 'Save IA32_PAT': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_IA32_PAT'], + 'Load IA32_PAT': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_PAT'], + 'Save IA32_EFER': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_IA32_EFER'], + 'Load IA32_EFER': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_LOAD_IA32_EFER'], + 'Save VMX-preemption timer value': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER'], + 'Clear IA32_BNDCFGS': ['FEAT_VMX_EXIT_CTLS', 'VMX_VM_EXIT_CLEAR_BNDCFGS'], + 'Load debug controls': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS'], + 'IA-32e mode guest': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_IA32E_MODE'], + 'Load IA32_PERF_GLOBAL_CTRL': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL'], + 'Load IA32_PAT': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_PAT'], + 'Load IA32_EFER': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_IA32_EFER'], + 'Load IA32_BNDCFGS': ['FEAT_VMX_ENTRY_CTLS', 'VMX_VM_ENTRY_LOAD_BNDCFGS'], + 'Store EFER.LMA into IA-32e mode guest control': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_STORE_LMA'], + 'HLT activity state': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_ACTIVITY_HLT'], + 'VMWRITE to VM-exit information fields': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_VMWRITE_VMEXIT'], + 'Inject event with insn length=0': ['FEAT_VMX_MISC', 'MSR_VMX_MISC_ZERO_LEN_INJECT'], + 'Execute-only EPT translations': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_EXECONLY'], + 'Page-walk length 4': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_PAGE_WALK_LENGTH_4'], + 'Paging-structure memory type WB': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_WB'], + '2MB EPT pages': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB'], + 'INVEPT supported': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT'], + 'EPT accessed and dirty flags': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_AD_BITS'], + 'Single-context INVEPT': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT'], + 'All-context INVEPT': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVEPT_ALL_CONTEXT'], + 'INVVPID supported': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID'], + 'Individual-address INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_ADDR'], + 'Single-context INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT'], + 'All-context INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_ALL_CONTEXT'], + 'Single-context-retaining-globals INVVPID': ['FEAT_VMX_EPT_VPID_CAPS', 'MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS'], + 'EPTP Switching': ['FEAT_VMX_VMFUNC', 'MSR_VMX_VMFUNC_EPT_SWITCHING'] + } + + import sys + import textwrap + + out = {} + for l in sys.stdin.readlines(): + l = l.rstrip() + if l.endswith('!!'): + l = l[:-2].rstrip() + if l.startswith(' ') and (l.endswith('default') or l.endswith('yes')): + l = l[4:] + for key, value in bits.items(): + if l.startswith(key): + ctl, bit = value + if ctl in out: + out[ctl] = out[ctl] + ' | ' + else: + out[ctl] = ' [%s] = ' % ctl + out[ctl] = out[ctl] + bit + + for x in sorted(out.keys()): + print("\n ".join(textwrap.wrap(out[x] + ","))) + +Note that the script has a bug in that some keys apply to both VM entry +and VM exit controls ("load IA32_PERF_GLOBAL_CTRL", "load IA32_EFER", +"load IA32_PAT". Those have to be fixed by hand. + +Reviewed-by: Eduardo Habkost +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 705 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 705 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fd248a78db..2f32d67aa5 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1799,6 +1799,34 @@ static CPUCaches epyc_cache_info = { + }, + }; + ++/* The following VMX features are not supported by KVM and are left out in the ++ * CPU definitions: ++ * ++ * Dual-monitor support (all processors) ++ * Entry to SMM ++ * Deactivate dual-monitor treatment ++ * Number of CR3-target values ++ * Shutdown activity state ++ * Wait-for-SIPI activity state ++ * PAUSE-loop exiting (Westmere and newer) ++ * EPT-violation #VE (Broadwell and newer) ++ * Inject event with insn length=0 (Skylake and newer) ++ * Conceal non-root operation from PT ++ * Conceal VM exits from PT ++ * Conceal VM entries from PT ++ * Enable ENCLS exiting ++ * Mode-based execute control (XS/XU) ++ s TSC scaling (Skylake Server and newer) ++ * GPA translation for PT (IceLake and newer) ++ * User wait and pause ++ * ENCLV exiting ++ * Load IA32_RTIT_CTL ++ * Clear IA32_RTIT_CTL ++ * Advanced VM-exit information for EPT violations ++ * Sub-page write permissions ++ * PT in VMX operation ++ */ ++ + static X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", +@@ -1873,6 +1901,24 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, + .xlevel = 0x80000008, + .model_id = "Intel(R) Core(TM)2 Duo CPU T7700 @ 2.40GHz", + }, +@@ -1900,6 +1946,20 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT3_OSVW, CPUID_EXT3_IBS, CPUID_EXT3_SVM */ + .features[FEAT_8000_0001_ECX] = + 0, ++ /* VMX features from Cedar Mill/Prescott */ ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING, + .xlevel = 0x80000008, + .model_id = "Common KVM processor" + }, +@@ -1931,6 +1991,19 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT_SSE3, + .features[FEAT_8000_0001_ECX] = + 0, ++ /* VMX features from Yonah */ ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, + .xlevel = 0x80000008, + .model_id = "Common 32-bit KVM processor" + }, +@@ -1952,6 +2025,18 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT_SSE3 | CPUID_EXT_MONITOR, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_NX, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, + .xlevel = 0x80000008, + .model_id = "Genuine Intel(R) CPU T2600 @ 2.16GHz", + }, +@@ -2062,6 +2147,24 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, + .xlevel = 0x80000008, + .model_id = "Intel Celeron_4x0 (Conroe/Merom Class Core 2)", + }, +@@ -2085,6 +2188,27 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, ++ .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING, + .xlevel = 0x80000008, + .model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)", + }, +@@ -2108,6 +2232,46 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID, + .xlevel = 0x80000008, + .model_id = "Intel Core i7 9xx (Nehalem Class Core i7)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2148,6 +2312,47 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_EXT3_LAHF_LM, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, + .xlevel = 0x80000008, + .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2193,6 +2398,47 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, + .xlevel = 0x80000008, + .model_id = "Intel Xeon E312xx (Sandy Bridge)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2241,6 +2487,50 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2292,6 +2582,52 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Core Processor (Haswell)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2376,6 +2712,53 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XSAVEOPT, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Core Processor (Broadwell)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2460,6 +2843,51 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Core Processor (Skylake)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2524,6 +2952,52 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Skylake)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2594,6 +3068,52 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Cascadelake)", + .versions = (X86CPUVersionDefinition[]) { +@@ -2724,6 +3244,51 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Core Processor (Icelake)", + }, +@@ -2782,6 +3347,52 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Icelake)", + }, +@@ -2829,6 +3440,53 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_6_EAX_ARAT, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Atom Processor (Denverton)", + }, +@@ -2899,6 +3557,53 @@ static X86CPUDefinition builtin_x86_defs[] = { + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, ++ .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | ++ MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | ++ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | ++ MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | ++ VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | ++ VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | ++ VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | ++ VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | ++ VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | ++ VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, ++ .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Atom Processor (SnowRidge)", + .versions = (X86CPUVersionDefinition[]) { +-- +2.27.0 + diff --git a/target-i386-add-VMX-features.patch b/target-i386-add-VMX-features.patch new file mode 100644 index 0000000000000000000000000000000000000000..50457d7f2125e4db535a18c4798e6bab48d68393 --- /dev/null +++ b/target-i386-add-VMX-features.patch @@ -0,0 +1,492 @@ +From 290ed17e639a67a9faf4a18b1b5973f9535bace4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 18:32:17 +0200 +Subject: [PATCH] target/i386: add VMX features + +Add code to convert the VMX feature words back into MSR values, +allowing the user to enable/disable VMX features as they wish. The same +infrastructure enables support for limiting VMX features in named +CPU models. + +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 225 ++++++++++++++++++++++++++++++++++++++++++++++ + target/i386/cpu.h | 9 ++ + target/i386/kvm.c | 162 ++++++++++++++++++++++++++++++++- + 3 files changed, 394 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 3d6541c4a8..fd248a78db 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1232,6 +1232,163 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .index = MSR_IA32_CORE_CAPABILITY, + }, + }, ++ ++ [FEAT_VMX_PROCBASED_CTLS] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ NULL, NULL, "vmx-vintr-pending", "vmx-tsc-offset", ++ NULL, NULL, NULL, "vmx-hlt-exit", ++ NULL, "vmx-invlpg-exit", "vmx-mwait-exit", "vmx-rdpmc-exit", ++ "vmx-rdtsc-exit", NULL, NULL, "vmx-cr3-load-noexit", ++ "vmx-cr3-store-noexit", NULL, NULL, "vmx-cr8-load-exit", ++ "vmx-cr8-store-exit", "vmx-flexpriority", "vmx-vnmi-pending", "vmx-movdr-exit", ++ "vmx-io-exit", "vmx-io-bitmap", NULL, "vmx-mtf", ++ "vmx-msr-bitmap", "vmx-monitor-exit", "vmx-pause-exit", "vmx-secondary-ctls", ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, ++ } ++ }, ++ ++ [FEAT_VMX_SECONDARY_CTLS] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ "vmx-apicv-xapic", "vmx-ept", "vmx-desc-exit", "vmx-rdtscp-exit", ++ "vmx-apicv-x2apic", "vmx-vpid", "vmx-wbinvd-exit", "vmx-unrestricted-guest", ++ "vmx-apicv-register", "vmx-apicv-vid", "vmx-ple", "vmx-rdrand-exit", ++ "vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit", ++ "vmx-rdseed-exit", "vmx-pml", NULL, NULL, ++ "vmx-xsaves", NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_PROCBASED_CTLS2, ++ } ++ }, ++ ++ [FEAT_VMX_PINBASED_CTLS] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ "vmx-intr-exit", NULL, NULL, "vmx-nmi-exit", ++ NULL, "vmx-vnmi", "vmx-preemption-timer", "vmx-posted-intr", ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_TRUE_PINBASED_CTLS, ++ } ++ }, ++ ++ [FEAT_VMX_EXIT_CTLS] = { ++ .type = MSR_FEATURE_WORD, ++ /* ++ * VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE is copied from ++ * the LM CPUID bit. ++ */ ++ .feat_names = { ++ NULL, NULL, "vmx-exit-nosave-debugctl", NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL /* vmx-exit-host-addr-space-size */, NULL, NULL, ++ "vmx-exit-load-perf-global-ctrl", NULL, NULL, "vmx-exit-ack-intr", ++ NULL, NULL, "vmx-exit-save-pat", "vmx-exit-load-pat", ++ "vmx-exit-save-efer", "vmx-exit-load-efer", ++ "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", ++ NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, ++ } ++ }, ++ ++ [FEAT_VMX_ENTRY_CTLS] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ NULL, NULL, "vmx-entry-noload-debugctl", NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, "vmx-entry-ia32e-mode", NULL, NULL, ++ NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", ++ "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_TRUE_ENTRY_CTLS, ++ } ++ }, ++ ++ [FEAT_VMX_MISC] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ NULL, NULL, NULL, NULL, ++ NULL, "vmx-store-lma", "vmx-activity-hlt", "vmx-activity-shutdown", ++ "vmx-activity-wait-sipi", NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, "vmx-vmwrite-vmexit-fields", "vmx-zero-len-inject", NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_MISC, ++ } ++ }, ++ ++ [FEAT_VMX_EPT_VPID_CAPS] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ "vmx-ept-execonly", NULL, NULL, NULL, ++ NULL, NULL, "vmx-page-walk-4", "vmx-page-walk-5", ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ "vmx-ept-2mb", "vmx-ept-1gb", NULL, NULL, ++ "vmx-invept", "vmx-eptad", "vmx-ept-advanced-exitinfo", NULL, ++ NULL, "vmx-invept-single-context", "vmx-invept-all-context", NULL, ++ NULL, NULL, NULL, NULL, ++ "vmx-invvpid", NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ "vmx-invvpid-single-addr", "vmx-invept-single-context", ++ "vmx-invvpid-all-context", "vmx-invept-single-context-noglobals", ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_EPT_VPID_CAP, ++ } ++ }, ++ ++ [FEAT_VMX_BASIC] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ [54] = "vmx-ins-outs", ++ [55] = "vmx-true-ctls", ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_BASIC, ++ }, ++ /* Just to be safe - we don't support setting the MSEG version field. */ ++ .no_autoenable_flags = MSR_VMX_BASIC_DUAL_MONITOR, ++ }, ++ ++ [FEAT_VMX_VMFUNC] = { ++ .type = MSR_FEATURE_WORD, ++ .feat_names = { ++ [0] = "vmx-eptp-switching", ++ }, ++ .msr = { ++ .index = MSR_IA32_VMX_VMFUNC, ++ } ++ }, ++ + }; + + typedef struct FeatureMask { +@@ -1252,6 +1409,74 @@ static FeatureDep feature_dependencies[] = { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, + .to = { FEAT_CORE_CAPABILITY, ~0ull }, + }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_PROCBASED_CTLS, ~0ull }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_PINBASED_CTLS, ~0ull }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_EXIT_CTLS, ~0ull }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_ENTRY_CTLS, ~0ull }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_MISC, ~0ull }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_VMX }, ++ .to = { FEAT_VMX_BASIC, ~0ull }, ++ }, ++ { ++ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, ++ .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_IA32E_MODE }, ++ }, ++ { ++ .from = { FEAT_VMX_PROCBASED_CTLS, VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, ~0ull }, ++ }, ++ { ++ .from = { FEAT_XSAVE, CPUID_XSAVE_XSAVES }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_XSAVES }, ++ }, ++ { ++ .from = { FEAT_1_ECX, CPUID_EXT_RDRAND }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDRAND_EXITING }, ++ }, ++ { ++ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID }, ++ }, ++ { ++ .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING }, ++ }, ++ { ++ .from = { FEAT_8000_0001_EDX, CPUID_EXT2_RDTSCP }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDTSCP }, ++ }, ++ { ++ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, ++ .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull }, ++ }, ++ { ++ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, ++ .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST }, ++ }, ++ { ++ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VPID }, ++ .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull << 32 }, ++ }, ++ { ++ .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, ++ .to = { FEAT_VMX_VMFUNC, ~0ull }, ++ }, + }; + + typedef struct X86RegisterInfo32 { +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index b4be6ffb1f..0b57b915af 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -518,6 +518,15 @@ typedef enum FeatureWord { + FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ + FEAT_ARCH_CAPABILITIES, + FEAT_CORE_CAPABILITY, ++ FEAT_VMX_PROCBASED_CTLS, ++ FEAT_VMX_SECONDARY_CTLS, ++ FEAT_VMX_PINBASED_CTLS, ++ FEAT_VMX_EXIT_CTLS, ++ FEAT_VMX_ENTRY_CTLS, ++ FEAT_VMX_MISC, ++ FEAT_VMX_EPT_VPID_CAPS, ++ FEAT_VMX_BASIC, ++ FEAT_VMX_VMFUNC, + FEATURE_WORDS, + } FeatureWord; + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index e9a6293ab2..fafb9fb26d 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -96,6 +96,7 @@ static bool has_msr_virt_ssbd; + static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; ++static bool has_msr_vmx_vmfunc; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -443,7 +444,8 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data; +- uint32_t ret; ++ uint64_t value; ++ uint32_t ret, can_be_one, must_be_one; + + if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ + return 0; +@@ -469,7 +471,25 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + exit(1); + } + +- return msr_data.entries[0].data; ++ value = msr_data.entries[0].data; ++ switch (index) { ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: ++ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ++ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: ++ case MSR_IA32_VMX_TRUE_EXIT_CTLS: ++ /* ++ * Return true for bits that can be one, but do not have to be one. ++ * The SDM tells us which bits could have a "must be one" setting, ++ * so we can do the opposite transformation in make_vmx_msr_value. ++ */ ++ must_be_one = (uint32_t)value; ++ can_be_one = (uint32_t)(value >> 32); ++ return can_be_one & ~must_be_one; ++ ++ default: ++ return value; ++ } + } + + +@@ -1933,6 +1953,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_CORE_CAPABILITY: + has_msr_core_capabs = true; + break; ++ case MSR_IA32_VMX_VMFUNC: ++ has_msr_vmx_vmfunc = true; ++ break; + } + } + } +@@ -2407,6 +2430,132 @@ static int kvm_put_msr_feature_control(X86CPU *cpu) + return 0; + } + ++static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features) ++{ ++ uint32_t default1, can_be_one, can_be_zero; ++ uint32_t must_be_one; ++ ++ switch (index) { ++ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: ++ default1 = 0x00000016; ++ break; ++ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: ++ default1 = 0x0401e172; ++ break; ++ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: ++ default1 = 0x000011ff; ++ break; ++ case MSR_IA32_VMX_TRUE_EXIT_CTLS: ++ default1 = 0x00036dff; ++ break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ default1 = 0; ++ break; ++ default: ++ abort(); ++ } ++ ++ /* If a feature bit is set, the control can be either set or clear. ++ * Otherwise the value is limited to either 0 or 1 by default1. ++ */ ++ can_be_one = features | default1; ++ can_be_zero = features | ~default1; ++ must_be_one = ~can_be_zero; ++ ++ /* ++ * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one). ++ * Bit 32:63 -> 1 if the control bit can be one. ++ */ ++ return must_be_one | (((uint64_t)can_be_one) << 32); ++} ++ ++#define VMCS12_MAX_FIELD_INDEX (0x17) ++ ++static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) ++{ ++ uint64_t kvm_vmx_basic = ++ kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_VMX_BASIC); ++ uint64_t kvm_vmx_misc = ++ kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_VMX_MISC); ++ uint64_t kvm_vmx_ept_vpid = ++ kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_VMX_EPT_VPID_CAP); ++ ++ /* ++ * If the guest is 64-bit, a value of 1 is allowed for the host address ++ * space size vmexit control. ++ */ ++ uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM ++ ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0; ++ ++ /* ++ * Bits 0-30, 32-44 and 50-53 come from the host. KVM should ++ * not change them for backwards compatibility. ++ */ ++ uint64_t fixed_vmx_basic = kvm_vmx_basic & ++ (MSR_VMX_BASIC_VMCS_REVISION_MASK | ++ MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK | ++ MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK); ++ ++ /* ++ * Same for bits 0-4 and 25-27. Bits 16-24 (CR3 target count) can ++ * change in the future but are always zero for now, clear them to be ++ * future proof. Bits 32-63 in theory could change, though KVM does ++ * not support dual-monitor treatment and probably never will; mask ++ * them out as well. ++ */ ++ uint64_t fixed_vmx_misc = kvm_vmx_misc & ++ (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK | ++ MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK); ++ ++ /* ++ * EPT memory types should not change either, so we do not bother ++ * adding features for them. ++ */ ++ uint64_t fixed_vmx_ept_mask = ++ (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ? ++ MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0); ++ uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask; ++ ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, ++ make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, ++ f[FEAT_VMX_PROCBASED_CTLS])); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, ++ make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS, ++ f[FEAT_VMX_PINBASED_CTLS])); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, ++ make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS, ++ f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, ++ make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS, ++ f[FEAT_VMX_ENTRY_CTLS])); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2, ++ make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2, ++ f[FEAT_VMX_SECONDARY_CTLS])); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP, ++ f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC, ++ f[FEAT_VMX_BASIC] | fixed_vmx_basic); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC, ++ f[FEAT_VMX_MISC] | fixed_vmx_misc); ++ if (has_msr_vmx_vmfunc) { ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]); ++ } ++ ++ /* ++ * Just to be safe, write these with constant values. The CRn_FIXED1 ++ * MSRs are generated by KVM based on the vCPU's CPUID. ++ */ ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0, ++ CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, ++ CR4_VMXE_MASK); ++ kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, ++ VMCS12_MAX_FIELD_INDEX << 1); ++} ++ + static int kvm_put_msrs(X86CPU *cpu, int level) + { + CPUX86State *env = &cpu->env; +@@ -2646,7 +2795,16 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ ++ ++ /* ++ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but ++ * all kernels with MSR features should have them. ++ */ ++ if (kvm_feature_msrs && cpu_has_vmx(env)) { ++ kvm_msr_entry_add_vmx(cpu, env->features); ++ } + } ++ + if (env->mcg_cap) { + int i; + +-- +2.27.0 + diff --git a/target-i386-add-a-ucode-rev-property.patch b/target-i386-add-a-ucode-rev-property.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a3ff6fd91f67dc93e12e75f9c84ce30dd7725f1 --- /dev/null +++ b/target-i386-add-a-ucode-rev-property.patch @@ -0,0 +1,125 @@ +From 9b3b22bfe87be7eec126056b96f7cea7e3ab9257 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:12 +0000 +Subject: [PATCH] target/i386: add a ucode-rev property + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-3-pbonzini@redhat.com> +Patchwork-id: 93909 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 2/6] target/i386: add a ucode-rev property +Bugzilla: 1791648 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Add the property and plumb it in TCG and HVF (the latter of which +tried to support returning a constant value but used the wrong MSR). + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-3-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4e45aff398cd1542c2a384a2a3b8600f23337d86) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 10 ++++++++++ + target/i386/cpu.h | 3 +++ + target/i386/hvf/x86_emu.c | 4 +--- + target/i386/misc_helper.c | 4 ++++ + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 35a33db39a..ec8bc9957e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6332,6 +6332,15 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + } + } + ++ if (cpu->ucode_rev == 0) { ++ /* The default is the same as KVM's. */ ++ if (IS_AMD_CPU(env)) { ++ cpu->ucode_rev = 0x01000065; ++ } else { ++ cpu->ucode_rev = 0x100000000ULL; ++ } ++ } ++ + /* mwait extended info: needed for Core compatibility */ + /* We always wake on interrupt even if host does not have the capability */ + cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +@@ -7011,6 +7020,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), + DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), + DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), ++ DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), + DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), + DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 0b57b915af..ca7de143af 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -345,6 +345,7 @@ typedef enum X86Seg { + #define MSR_IA32_SPEC_CTRL 0x48 + #define MSR_VIRT_SSBD 0xc001011f + #define MSR_IA32_PRED_CMD 0x49 ++#define MSR_IA32_UCODE_REV 0x8b + #define MSR_IA32_CORE_CAPABILITY 0xcf + #define MSR_IA32_ARCH_CAPABILITIES 0x10a + #define MSR_IA32_TSCDEADLINE 0x6e0 +@@ -1562,6 +1563,8 @@ struct X86CPU { + CPUNegativeOffsetState neg; + CPUX86State env; + ++ uint64_t ucode_rev; ++ + uint32_t hyperv_spinlock_attempts; + char *hyperv_vendor_id; + bool hyperv_synic_kvm_only; +diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c +index 1b04bd7e94..cd40520c16 100644 +--- a/target/i386/hvf/x86_emu.c ++++ b/target/i386/hvf/x86_emu.c +@@ -664,8 +664,6 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) + RIP(env) += decode->len; + } + +-#define MSR_IA32_UCODE_REV 0x00000017 +- + void simulate_rdmsr(struct CPUState *cpu) + { + X86CPU *x86_cpu = X86_CPU(cpu); +@@ -681,7 +679,7 @@ void simulate_rdmsr(struct CPUState *cpu) + val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); + break; + case MSR_IA32_UCODE_REV: +- val = (0x100000000ULL << 32) | 0x100000000ULL; ++ val = x86_cpu->ucode_rev; + break; + case MSR_EFER: + val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); +diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c +index 3eff6885f8..aed16fe3f0 100644 +--- a/target/i386/misc_helper.c ++++ b/target/i386/misc_helper.c +@@ -229,6 +229,7 @@ void helper_rdmsr(CPUX86State *env) + #else + void helper_wrmsr(CPUX86State *env) + { ++ X86CPU *x86_cpu = env_archcpu(env); + uint64_t val; + + cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); +@@ -371,6 +372,9 @@ void helper_wrmsr(CPUX86State *env) + env->msr_bndcfgs = val; + cpu_sync_bndcs_hflags(env); + break; ++ case MSR_IA32_UCODE_REV: ++ val = x86_cpu->ucode_rev; ++ break; + default: + if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL + && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + +-- +2.27.0 + diff --git a/target-i386-add-two-missing-VMX-features-for-Skylake.patch b/target-i386-add-two-missing-VMX-features-for-Skylake.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa3c6d8f39ad28cb75995e142697bee2bf48e97a --- /dev/null +++ b/target-i386-add-two-missing-VMX-features-for-Skylake.patch @@ -0,0 +1,42 @@ +From 1faa48f4de44c123143d43e67cd5a478628a45a4 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 25 Nov 2019 19:12:16 +0100 +Subject: [PATCH] target/i386: add two missing VMX features for Skylake and + CascadeLake Server + +They are present in client (Core) Skylake but pasted wrong into the server +SKUs. + +Reported-by: Dr. David Alan Gilbert +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 2f32d67aa5..6f27a5170a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -2997,7 +2997,8 @@ static X86CPUDefinition builtin_x86_defs[] = { + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | +- VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Skylake)", + .versions = (X86CPUVersionDefinition[]) { +@@ -3113,7 +3114,8 @@ static X86CPUDefinition builtin_x86_defs[] = { + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | +- VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (Cascadelake)", + .versions = (X86CPUVersionDefinition[]) { +-- +2.27.0 + diff --git a/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch b/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch new file mode 100644 index 0000000000000000000000000000000000000000..377226a4cbff7b6a4c83d410f8f78ff18bc97190 --- /dev/null +++ b/target-i386-check-for-availability-of-MSR_IA32_UCODE.patch @@ -0,0 +1,58 @@ +From 0633e7684b4f4da858a3739d68cb57a1d49bdf01 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 11 Feb 2020 18:55:16 +0100 +Subject: [PATCH] target/i386: check for availability of MSR_IA32_UCODE_REV as + an emulated MSR + +Even though MSR_IA32_UCODE_REV has been available long before Linux 5.6, +which added it to the emulated MSR list, a bug caused the microcode +version to revert to 0x100000000 on INIT. As a result, processors other +than the bootstrap processor would not see the host microcode revision; +some Windows version complain loudly about this and crash with a +fairly explicit MICROCODE REVISION MISMATCH error. + +[If running 5.6 prereleases, the kernel fix "KVM: x86: do not reset + microcode version on INIT or RESET" should also be applied.] + +Reported-by: Alex Williamson +Message-id: <20200211175516.10716-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 7437f86130..e49a2d2585 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -99,6 +99,7 @@ static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; ++static bool has_msr_ucode_rev; + static bool has_msr_vmx_procbased_ctls2; + + static uint32_t has_architectural_pmu_version; +@@ -1985,6 +1986,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; ++ case MSR_IA32_UCODE_REV: ++ has_msr_ucode_rev = true; ++ break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + has_msr_vmx_procbased_ctls2 = true; + break; +@@ -2628,8 +2632,7 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + +- if (kvm_arch_get_supported_msr_feature(kvm_state, +- MSR_IA32_UCODE_REV)) { ++ if (has_msr_ucode_rev) { + kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); + } + +-- +2.27.0 + diff --git a/target-i386-disable-VMX-features-if-nested-0.patch b/target-i386-disable-VMX-features-if-nested-0.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa7edfdb2a7296a0039a44e2c4c1af9b5b324951 --- /dev/null +++ b/target-i386-disable-VMX-features-if-nested-0.patch @@ -0,0 +1,43 @@ +From 26f01427d155510edcab07e312a72f5bacddafb2 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Fri, 6 Dec 2019 15:11:11 +0800 +Subject: [PATCH] target/i386: disable VMX features if nested=0 + +If kvm does not support VMX feature by nested=0, the kvm_vmx_basic +can't get the right value from MSR_IA32_VMX_BASIC register, which +make qemu coredump when qemu do KVM_SET_MSRS. + +The coredump info: +error: failed to set MSR 0x480 to 0x0 +kvm_put_msrs: Assertion `ret == cpu->kvm_msr_buf->nmsrs' failed. + +Signed-off-by: Yang Zhong +Message-Id: <20191206071111.12128-1-yang.zhong@intel.com> +Reported-by: Catherine Ho +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index b97f40df6b..5ee0c50d7c 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2493,6 +2493,14 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + uint64_t kvm_vmx_basic = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_BASIC); ++ ++ if (!kvm_vmx_basic) { ++ /* If the kernel doesn't support VMX feature (kvm_intel.nested=0), ++ * then kvm_vmx_basic will be 0 and KVM_SET_MSR will fail. ++ */ ++ return; ++ } ++ + uint64_t kvm_vmx_misc = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_MISC); +-- +2.27.0 + diff --git a/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch b/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch new file mode 100644 index 0000000000000000000000000000000000000000..8eda458156b202ba7c5405bf1e261e56a7aa1771 --- /dev/null +++ b/target-i386-do-not-set-unsupported-VMX-secondary-exe.patch @@ -0,0 +1,102 @@ +From 472ccc3e48cab962ec9acf3f31e4467544b51705 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Tue, 31 Mar 2020 18:27:52 +0200 +Subject: [PATCH] target/i386: do not set unsupported VMX secondary execution + controls + +Commit 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for +secondary execution controls") added a workaround for KVM pre-dating +commit 6defc591846d ("KVM: nVMX: include conditional controls in /dev/kvm +KVM_GET_MSRS") which wasn't setting certain available controls. The +workaround uses generic CPUID feature bits to set missing VMX controls. + +It was found that in some cases it is possible to observe hosts which +have certain CPUID features but lack the corresponding VMX control. + +In particular, it was reported that Azure VMs have RDSEED but lack +VMX_SECONDARY_EXEC_RDSEED_EXITING; attempts to enable this feature +bit result in QEMU abort. + +Resolve the issue but not applying the workaround when we don't have +to. As there is no good way to find out if KVM has the fix itself, use +95c5c7c77c ("KVM: nVMX: list VMX MSRs in KVM_GET_MSR_INDEX_LIST") instead +as these [are supposed to] come together. + +Fixes: 048c95163b4 ("target/i386: work around KVM_GET_MSRS bug for secondary execution controls") +Suggested-by: Paolo Bonzini +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20200331162752.1209928-1-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm.c | 41 ++++++++++++++++++++++++++--------------- + 1 file changed, 26 insertions(+), 15 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 5ee0c50d7c..7328746d92 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -97,6 +97,7 @@ static bool has_msr_smi_count; + static bool has_msr_arch_capabs; + static bool has_msr_core_capabs; + static bool has_msr_vmx_vmfunc; ++static bool has_msr_vmx_procbased_ctls2; + + static uint32_t has_architectural_pmu_version; + static uint32_t num_architectural_pmu_gp_counters; +@@ -474,21 +475,28 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: +- /* KVM forgot to add these bits for some time, do this ourselves. */ +- if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; +- } +- if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { +- value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ if (!has_msr_vmx_procbased_ctls2) { ++ /* KVM forgot to add these bits for some time, do this ourselves. */ ++ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & ++ CPUID_XSAVE_XSAVES) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & ++ CPUID_EXT_RDRAND) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_INVPCID) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & ++ CPUID_7_0_EBX_RDSEED) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & ++ CPUID_EXT2_RDTSCP) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ } + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +@@ -1973,6 +1981,9 @@ static int kvm_get_supported_msrs(KVMState *s) + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; ++ case MSR_IA32_VMX_PROCBASED_CTLS2: ++ has_msr_vmx_procbased_ctls2 = true; ++ break; + } + } + } +-- +2.27.0 + diff --git a/target-i386-enable-monitor-and-ucode-revision-with-c.patch b/target-i386-enable-monitor-and-ucode-revision-with-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..398a79d1648aa2d595ad39098a49c00b7b8ab95a --- /dev/null +++ b/target-i386-enable-monitor-and-ucode-revision-with-c.patch @@ -0,0 +1,48 @@ +From 8470399d9508b3b56d625866ea235c2a5b4cb39a Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:16 +0000 +Subject: [PATCH] target/i386: enable monitor and ucode revision with -cpu max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-7-pbonzini@redhat.com> +Patchwork-id: 93910 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 6/6] target/i386: enable monitor and ucode revision with -cpu max +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +These two features were incorrectly tied to host_cpuid_required rather than +cpu->max_features. As a result, -cpu max was not enabling either MONITOR +features or ucode revision. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit be02cda3afde60d219786e23c3f8edb53aec8e17) + +[RHEL7: context, upstream uses g_autofree] + +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 22e0e89718..6147cd419a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6317,7 +6317,9 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + g_free(name); + goto out; + } ++ } + ++ if (cpu->max_features && accel_uses_host_cpuid()) { + if (enable_cpu_pm) { + host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, + &cpu->mwait.ecx, &cpu->mwait.edx); +-- +2.27.0 + diff --git a/target-i386-expand-feature-words-to-64-bits.patch b/target-i386-expand-feature-words-to-64-bits.patch new file mode 100644 index 0000000000000000000000000000000000000000..e4a06e5954aec3ec8d30e29c61234612ed36d0c7 --- /dev/null +++ b/target-i386-expand-feature-words-to-64-bits.patch @@ -0,0 +1,295 @@ +From bec2d75a3d3c6405d0afe59c343d23199b009666 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 17:38:54 +0200 +Subject: [PATCH] target/i386: expand feature words to 64 bits + +VMX requires 64-bit feature words for the IA32_VMX_EPT_VPID_CAP +and IA32_VMX_BASIC MSRs. (The VMX control MSRs are 64-bit wide but +actually have only 32 bits of information). + +Signed-off-by: Paolo Bonzini +--- + include/sysemu/kvm.h | 2 +- + target/i386/cpu.c | 71 +++++++++++++++++++++++--------------------- + target/i386/cpu.h | 2 +- + target/i386/kvm.c | 2 +- + 4 files changed, 40 insertions(+), 37 deletions(-) + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 565adb4e2c..875b2bf10d 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -464,7 +464,7 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension); + + uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, + uint32_t index, int reg); +-uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); ++uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); + + + void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index d4a435ba96..3d6541c4a8 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -789,7 +789,7 @@ typedef struct FeatureWordInfo { + * In cases of disagreement between feature naming conventions, + * aliases may be added. + */ +- const char *feat_names[32]; ++ const char *feat_names[64]; + union { + /* If type==CPUID_FEATURE_WORD */ + struct { +@@ -803,11 +803,11 @@ typedef struct FeatureWordInfo { + uint32_t index; + } msr; + }; +- uint32_t tcg_features; /* Feature flags supported by TCG */ +- uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ +- uint32_t migratable_flags; /* Feature flags known to be migratable */ ++ uint64_t tcg_features; /* Feature flags supported by TCG */ ++ uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */ ++ uint64_t migratable_flags; /* Feature flags known to be migratable */ + /* Features that shouldn't be auto-enabled by "-cpu host" */ +- uint32_t no_autoenable_flags; ++ uint64_t no_autoenable_flags; + } FeatureWordInfo; + + static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { +@@ -1236,7 +1236,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + + typedef struct FeatureMask { + FeatureWord index; +- uint32_t mask; ++ uint64_t mask; + } FeatureMask; + + typedef struct FeatureDep { +@@ -1246,11 +1246,11 @@ typedef struct FeatureDep { + static FeatureDep feature_dependencies[] = { + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, +- .to = { FEAT_ARCH_CAPABILITIES, ~0u }, ++ .to = { FEAT_ARCH_CAPABILITIES, ~0ull }, + }, + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, +- .to = { FEAT_CORE_CAPABILITY, ~0u }, ++ .to = { FEAT_CORE_CAPABILITY, ~0ull }, + }, + }; + +@@ -1362,14 +1362,14 @@ const char *get_register_name_32(unsigned int reg) + * Returns the set of feature flags that are supported and migratable by + * QEMU, for a given FeatureWord. + */ +-static uint32_t x86_cpu_get_migratable_flags(FeatureWord w) ++static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) + { + FeatureWordInfo *wi = &feature_word_info[w]; +- uint32_t r = 0; ++ uint64_t r = 0; + int i; + +- for (i = 0; i < 32; i++) { +- uint32_t f = 1U << i; ++ for (i = 0; i < 64; i++) { ++ uint64_t f = 1ULL << i; + + /* If the feature name is known, it is implicitly considered migratable, + * unless it is explicitly set in unmigratable_flags */ +@@ -3051,7 +3051,7 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) + assert(pv->prop); + } + +-static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, ++static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only); + + static bool lmce_supported(void) +@@ -3237,7 +3237,7 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) + return false; + } + +-static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, ++static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) + { + CPUX86State *env = &cpu->env; +@@ -3254,8 +3254,8 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, + return; + } + +- for (i = 0; i < 32; ++i) { +- if ((1UL << i) & mask) { ++ for (i = 0; i < 64; ++i) { ++ if ((1ULL << i) & mask) { + feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, +@@ -3498,7 +3498,7 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) + { +- uint32_t *array = (uint32_t *)opaque; ++ uint64_t *array = (uint64_t *)opaque; + FeatureWord w; + X86CPUFeatureWordInfo word_infos[FEATURE_WORDS] = { }; + X86CPUFeatureWordInfoList list_entries[FEATURE_WORDS] = { }; +@@ -3542,6 +3542,7 @@ static inline void feat2prop(char *s) + /* Return the feature property name for a feature flag bit */ + static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) + { ++ const char *name; + /* XSAVE components are automatically enabled by other features, + * so return the original feature name instead + */ +@@ -3555,9 +3556,11 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) + } + } + +- assert(bitnr < 32); ++ assert(bitnr < 64); + assert(w < FEATURE_WORDS); +- return feature_word_info[w].feat_names[bitnr]; ++ name = feature_word_info[w].feat_names[bitnr]; ++ assert(bitnr < 32 || !(name && feature_word_info[w].type == CPUID_FEATURE_WORD)); ++ return name; + } + + /* Compatibily hack to maintain legacy +-feat semantic, +@@ -3673,10 +3676,10 @@ static void x86_cpu_list_feature_names(FeatureWordArray features, + strList **next = feat_names; + + for (w = 0; w < FEATURE_WORDS; w++) { +- uint32_t filtered = features[w]; ++ uint64_t filtered = features[w]; + int i; +- for (i = 0; i < 32; i++) { +- if (filtered & (1UL << i)) { ++ for (i = 0; i < 64; i++) { ++ if (filtered & (1ULL << i)) { + strList *new = g_new0(strList, 1); + new->value = g_strdup(x86_cpu_feature_name(w, i)); + *next = new; +@@ -3845,7 +3848,7 @@ void x86_cpu_list(void) + names = NULL; + for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) { + FeatureWordInfo *fw = &feature_word_info[i]; +- for (j = 0; j < 32; j++) { ++ for (j = 0; j < 64; j++) { + if (fw->feat_names[j]) { + names = g_list_append(names, (gpointer)fw->feat_names[j]); + } +@@ -3900,11 +3903,11 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + return cpu_list; + } + +-static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, ++static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only) + { + FeatureWordInfo *wi = &feature_word_info[w]; +- uint32_t r = 0; ++ uint64_t r = 0; + + if (kvm_enabled()) { + switch (wi->type) { +@@ -4075,7 +4078,7 @@ static QDict *x86_cpu_static_props(void) + for (w = 0; w < FEATURE_WORDS; w++) { + FeatureWordInfo *fi = &feature_word_info[w]; + int bit; +- for (bit = 0; bit < 32; bit++) { ++ for (bit = 0; bit < 64; bit++) { + if (!fi->feat_names[bit]) { + continue; + } +@@ -5231,7 +5234,7 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { + FeatureDep *d = &feature_dependencies[i]; + if (!(env->features[d->from.index] & d->from.mask)) { +- uint32_t unavailable_features = env->features[d->to.index] & d->to.mask; ++ uint64_t unavailable_features = env->features[d->to.index] & d->to.mask; + + /* Not an error unless the dependent feature was added explicitly. */ + mark_unavailable_features(cpu, d->to.index, +@@ -5326,10 +5329,10 @@ static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) + } + + for (w = 0; w < FEATURE_WORDS; w++) { +- uint32_t host_feat = ++ uint64_t host_feat = + x86_cpu_get_supported_feature_word(w, false); +- uint32_t requested_features = env->features[w]; +- uint32_t unavailable_features = requested_features & ~host_feat; ++ uint64_t requested_features = env->features[w]; ++ uint64_t unavailable_features = requested_features & ~host_feat; + mark_unavailable_features(cpu, w, unavailable_features, prefix); + } + +@@ -5626,7 +5629,7 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) + + typedef struct BitProperty { + FeatureWord w; +- uint32_t mask; ++ uint64_t mask; + } BitProperty; + + static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, +@@ -5634,7 +5637,7 @@ static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, + { + X86CPU *cpu = X86_CPU(obj); + BitProperty *fp = opaque; +- uint32_t f = cpu->env.features[fp->w]; ++ uint64_t f = cpu->env.features[fp->w]; + bool value = (f & fp->mask) == fp->mask; + visit_type_bool(v, name, &value, errp); + } +@@ -5687,7 +5690,7 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu, + { + BitProperty *fp; + ObjectProperty *op; +- uint32_t mask = (1UL << bitnr); ++ uint64_t mask = (1ULL << bitnr); + + op = object_property_find(OBJECT(cpu), prop_name, NULL); + if (op) { +@@ -5821,7 +5824,7 @@ static void x86_cpu_initfn(Object *obj) + for (w = 0; w < FEATURE_WORDS; w++) { + int bitnr; + +- for (bitnr = 0; bitnr < 32; bitnr++) { ++ for (bitnr = 0; bitnr < 64; bitnr++) { + x86_cpu_register_feature_bit_props(cpu, w, bitnr); + } + } +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 24d489db0f..9a105b2251 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -502,7 +502,7 @@ typedef enum FeatureWord { + FEATURE_WORDS, + } FeatureWord; + +-typedef uint32_t FeatureWordArray[FEATURE_WORDS]; ++typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + + /* cpuid_features bits */ + #define CPUID_FP87 (1U << 0) +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index f55d4b4b97..e9a6293ab2 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -437,7 +437,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + return ret; + } + +-uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) ++uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + { + struct { + struct kvm_msrs info; +-- +2.27.0 + diff --git a/target-i386-handle-filtered_features-in-a-new-functi.patch b/target-i386-handle-filtered_features-in-a-new-functi.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba35948dd1e1c10f566c327750026c22b626a5f2 --- /dev/null +++ b/target-i386-handle-filtered_features-in-a-new-functi.patch @@ -0,0 +1,176 @@ +From b9d29966103ca671718ef1eb5b68067b05fad340 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 2 Jul 2019 15:32:41 +0200 +Subject: [PATCH] target/i386: handle filtered_features in a new function + mark_unavailable_features + +The next patch will add a different reason for filtering features, unrelated +to host feature support. Extract a new function that takes care of disabling +the features and optionally reporting them. + +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 87 ++++++++++++++++++++++++++--------------------- + 1 file changed, 48 insertions(+), 39 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index e65f372f25..8798cafc7a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3216,17 +3216,41 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) + return NULL; + } + +-static void report_unavailable_features(FeatureWord w, uint32_t mask) ++static bool x86_cpu_have_filtered_features(X86CPU *cpu) + { ++ FeatureWord w; ++ ++ for (w = 0; w < FEATURE_WORDS; w++) { ++ if (cpu->filtered_features[w]) { ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, ++ const char *verbose_prefix) ++{ ++ CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + char *feat_word_str; + ++ if (!cpu->force_features) { ++ env->features[w] &= ~mask; ++ } ++ cpu->filtered_features[w] |= mask; ++ ++ if (!verbose_prefix) { ++ return; ++ } ++ + for (i = 0; i < 32; ++i) { + if ((1UL << i) & mask) { + feat_word_str = feature_word_description(f, i); +- warn_report("%s doesn't support requested feature: %s%s%s [bit %d]", +- accel_uses_host_cpuid() ? "host" : "TCG", ++ warn_report("%s: %s%s%s [bit %d]", ++ verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); +@@ -3631,7 +3655,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, + } + + static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); +-static int x86_cpu_filter_features(X86CPU *cpu); ++static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); + + /* Build a list with the name of all features on a feature word array */ + static void x86_cpu_list_feature_names(FeatureWordArray features, +@@ -3696,7 +3720,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, + next = &new->next; + } + +- x86_cpu_filter_features(xc); ++ x86_cpu_filter_features(xc, false); + + x86_cpu_list_feature_names(xc->filtered_features, next); + +@@ -3904,15 +3928,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, + return r; + } + +-static void x86_cpu_report_filtered_features(X86CPU *cpu) +-{ +- FeatureWord w; +- +- for (w = 0; w < FEATURE_WORDS; w++) { +- report_unavailable_features(w, cpu->filtered_features[w]); +- } +-} +- + static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) + { + PropValue *pv; +@@ -5274,24 +5289,24 @@ out: + * + * Returns: 0 if all flags are supported by the host, non-zero otherwise. + */ +-static int x86_cpu_filter_features(X86CPU *cpu) ++static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) + { + CPUX86State *env = &cpu->env; + FeatureWord w; +- int rv = 0; ++ const char *prefix = NULL; ++ ++ if (verbose) { ++ prefix = accel_uses_host_cpuid() ++ ? "host doesn't support requested feature" ++ : "TCG doesn't support requested feature"; ++ } + + for (w = 0; w < FEATURE_WORDS; w++) { + uint32_t host_feat = + x86_cpu_get_supported_feature_word(w, false); + uint32_t requested_features = env->features[w]; +- uint32_t available_features = requested_features & host_feat; +- if (!cpu->force_features) { +- env->features[w] = available_features; +- } +- cpu->filtered_features[w] = requested_features & ~available_features; +- if (cpu->filtered_features[w]) { +- rv = 1; +- } ++ uint32_t unavailable_features = requested_features & ~host_feat; ++ mark_unavailable_features(cpu, w, unavailable_features, prefix); + } + + if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && +@@ -5317,13 +5332,9 @@ static int x86_cpu_filter_features(X86CPU *cpu) + * host can't emulate the capabilities we report on + * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. + */ +- env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; +- cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; +- rv = 1; ++ mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); + } + } +- +- return rv; + } + + static void x86_cpu_realizefn(DeviceState *dev, Error **errp) +@@ -5364,16 +5375,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + +- if (x86_cpu_filter_features(cpu) && +- (cpu->check_cpuid || cpu->enforce_cpuid)) { +- x86_cpu_report_filtered_features(cpu); +- if (cpu->enforce_cpuid) { +- error_setg(&local_err, +- accel_uses_host_cpuid() ? +- "Host doesn't support requested features" : +- "TCG doesn't support requested features"); +- goto out; +- } ++ x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); ++ ++ if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { ++ error_setg(&local_err, ++ accel_uses_host_cpuid() ? ++ "Host doesn't support requested features" : ++ "TCG doesn't support requested features"); ++ goto out; + } + + /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on +-- +2.27.0 + diff --git a/target-i386-introduce-generic-feature-dependency-mec.patch b/target-i386-introduce-generic-feature-dependency-mec.patch new file mode 100644 index 0000000000000000000000000000000000000000..da374c58652d5559993c9a584d7c83377d6669cd --- /dev/null +++ b/target-i386-introduce-generic-feature-dependency-mec.patch @@ -0,0 +1,146 @@ +From ed8fa9d895a0e06434b4163405aeaacbe65bcf44 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 17:26:45 +0200 +Subject: [PATCH] target/i386: introduce generic feature dependency mechanism + +Sometimes a CPU feature does not make sense unless another is +present. In the case of VMX features, KVM does not even allow +setting the VMX controls to some invalid combinations. + +Therefore, this patch adds a generic mechanism that looks for bits +that the user explicitly cleared, and uses them to remove other bits +from the expanded CPU definition. If these dependent bits were also +explicitly *set* by the user, this will be a warning for "-cpu check" +and an error for "-cpu enforce". If not, then the dependent bits are +cleared silently, for convenience. + +With VMX features, this will be used so that for example +"-cpu host,-rdrand" will also hide support for RDRAND exiting. + +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 72 +++++++++++++++++++++++++++++++---------------- + 1 file changed, 48 insertions(+), 24 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8798cafc7a..d4a435ba96 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -801,10 +801,6 @@ typedef struct FeatureWordInfo { + /* If type==MSR_FEATURE_WORD */ + struct { + uint32_t index; +- struct { /*CPUID that enumerate this MSR*/ +- FeatureWord cpuid_class; +- uint32_t cpuid_flag; +- } cpuid_dep; + } msr; + }; + uint32_t tcg_features; /* Feature flags supported by TCG */ +@@ -1218,10 +1214,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +- .cpuid_dep = { +- FEAT_7_0_EDX, +- CPUID_7_0_EDX_ARCH_CAPABILITIES +- } + }, + }, + [FEAT_CORE_CAPABILITY] = { +@@ -1238,14 +1230,30 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .msr = { + .index = MSR_IA32_CORE_CAPABILITY, +- .cpuid_dep = { +- FEAT_7_0_EDX, +- CPUID_7_0_EDX_CORE_CAPABILITY, +- }, + }, + }, + }; + ++typedef struct FeatureMask { ++ FeatureWord index; ++ uint32_t mask; ++} FeatureMask; ++ ++typedef struct FeatureDep { ++ FeatureMask from, to; ++} FeatureDep; ++ ++static FeatureDep feature_dependencies[] = { ++ { ++ .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, ++ .to = { FEAT_ARCH_CAPABILITIES, ~0u }, ++ }, ++ { ++ .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, ++ .to = { FEAT_CORE_CAPABILITY, ~0u }, ++ }, ++}; ++ + typedef struct X86RegisterInfo32 { + /* Name of register */ + const char *name; +@@ -5183,9 +5191,26 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + { + CPUX86State *env = &cpu->env; + FeatureWord w; ++ int i; + GList *l; + Error *local_err = NULL; + ++ for (l = plus_features; l; l = l->next) { ++ const char *prop = l->data; ++ object_property_set_bool(OBJECT(cpu), true, prop, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ for (l = minus_features; l; l = l->next) { ++ const char *prop = l->data; ++ object_property_set_bool(OBJECT(cpu), false, prop, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ + /*TODO: Now cpu->max_features doesn't overwrite features + * set using QOM properties, and we can convert + * plus_features & minus_features to global properties +@@ -5203,19 +5228,18 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + } + } + +- for (l = plus_features; l; l = l->next) { +- const char *prop = l->data; +- object_property_set_bool(OBJECT(cpu), true, prop, &local_err); +- if (local_err) { +- goto out; +- } +- } ++ for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { ++ FeatureDep *d = &feature_dependencies[i]; ++ if (!(env->features[d->from.index] & d->from.mask)) { ++ uint32_t unavailable_features = env->features[d->to.index] & d->to.mask; + +- for (l = minus_features; l; l = l->next) { +- const char *prop = l->data; +- object_property_set_bool(OBJECT(cpu), false, prop, &local_err); +- if (local_err) { +- goto out; ++ /* Not an error unless the dependent feature was added explicitly. */ ++ mark_unavailable_features(cpu, d->to.index, ++ unavailable_features & env->user_features[d->to.index], ++ "This feature depends on other features that were not requested"); ++ ++ env->user_features[d->to.index] |= unavailable_features; ++ env->features[d->to.index] &= ~unavailable_features; + } + } + +-- +2.27.0 + diff --git a/target-i386-kvm-initialize-feature-MSRs-very-early.patch b/target-i386-kvm-initialize-feature-MSRs-very-early.patch new file mode 100644 index 0000000000000000000000000000000000000000..90b6f6fa4ba76fb4dde12567fcf3aee236236bde --- /dev/null +++ b/target-i386-kvm-initialize-feature-MSRs-very-early.patch @@ -0,0 +1,178 @@ +From c222711e37196e4be1776a084a1acb3c5a1f7283 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 17 Feb 2020 16:23:11 +0000 +Subject: [PATCH] target/i386: kvm: initialize feature MSRs very early +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +Message-id: <20200217162316.2464-2-pbonzini@redhat.com> +Patchwork-id: 93899 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 1/6] target/i386: kvm: initialize feature MSRs very early +Bugzilla: 1791648 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Maxim Levitsky +RH-Acked-by: Dr. David Alan Gilbert + +Some read-only MSRs affect the behavior of ioctls such as +KVM_SET_NESTED_STATE. We can initialize them once and for all +right after the CPU is realized, since they will never be modified +by the guest. + +Reported-by: Qingua Cheng +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-2-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 420ae1fc51c99abfd03b1c590f55617edd2a2bed) +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/kvm.c | 81 +++++++++++++++++++++++++----------------- + target/i386/kvm_i386.h | 1 + + 2 files changed, 49 insertions(+), 33 deletions(-) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 7328746d92..60060087fd 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -63,6 +63,8 @@ + * 255 kvm_msr_entry structs */ + #define MSR_BUF_SIZE 4096 + ++static void kvm_init_msrs(X86CPU *cpu); ++ + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), +@@ -1777,6 +1779,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + has_msr_tsc_aux = false; + } + ++ kvm_init_msrs(cpu); ++ + r = hyperv_init_vcpu(cpu); + if (r) { + goto fail; +@@ -2592,11 +2596,53 @@ static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) + VMCS12_MAX_FIELD_INDEX << 1); + } + ++static int kvm_buf_set_msrs(X86CPU *cpu) ++{ ++ int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (ret < cpu->kvm_msr_buf->nmsrs) { ++ struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; ++ error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, ++ (uint32_t)e->index, (uint64_t)e->data); ++ } ++ ++ assert(ret == cpu->kvm_msr_buf->nmsrs); ++ return 0; ++} ++ ++static void kvm_init_msrs(X86CPU *cpu) ++{ ++ CPUX86State *env = &cpu->env; ++ ++ kvm_msr_buf_reset(cpu); ++ if (has_msr_arch_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, ++ env->features[FEAT_ARCH_CAPABILITIES]); ++ } ++ ++ if (has_msr_core_capabs) { ++ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, ++ env->features[FEAT_CORE_CAPABILITY]); ++ } ++ ++ /* ++ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but ++ * all kernels with MSR features should have them. ++ */ ++ if (kvm_feature_msrs && cpu_has_vmx(env)) { ++ kvm_msr_entry_add_vmx(cpu, env->features); ++ } ++ ++ assert(kvm_buf_set_msrs(cpu) == 0); ++} ++ + static int kvm_put_msrs(X86CPU *cpu, int level) + { + CPUX86State *env = &cpu->env; + int i; +- int ret; + + kvm_msr_buf_reset(cpu); + +@@ -2648,17 +2694,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + #endif + +- /* If host supports feature MSR, write down. */ +- if (has_msr_arch_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, +- env->features[FEAT_ARCH_CAPABILITIES]); +- } +- +- if (has_msr_core_capabs) { +- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, +- env->features[FEAT_CORE_CAPABILITY]); +- } +- + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. +@@ -2831,14 +2866,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ +- +- /* +- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but +- * all kernels with MSR features should have them. +- */ +- if (kvm_feature_msrs && cpu_has_vmx(env)) { +- kvm_msr_entry_add_vmx(cpu, env->features); +- } + } + + if (env->mcg_cap) { +@@ -2854,19 +2881,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + } + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); +- if (ret < 0) { +- return ret; +- } +- +- if (ret < cpu->kvm_msr_buf->nmsrs) { +- struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; +- error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, +- (uint32_t)e->index, (uint64_t)e->data); +- } +- +- assert(ret == cpu->kvm_msr_buf->nmsrs); +- return 0; ++ return kvm_buf_set_msrs(cpu); + } + + +diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h +index 06fe06bdb3..d98c6f69d0 100644 +--- a/target/i386/kvm_i386.h ++++ b/target/i386/kvm_i386.h +@@ -66,4 +66,5 @@ bool kvm_enable_x2apic(void); + bool kvm_has_x2apic_api(void); + + bool kvm_hv_vpindex_settable(void); ++ + #endif +-- +2.27.0 + diff --git a/target-i386-kvm-initialize-microcode-revision-from-K.patch b/target-i386-kvm-initialize-microcode-revision-from-K.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c15a47a5c025cc76e9f0fb2d9ade102a6cee294 --- /dev/null +++ b/target-i386-kvm-initialize-microcode-revision-from-K.patch @@ -0,0 +1,50 @@ +From 8664cd20e4cdb8594076a26dacef592a4b4816b2 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 20 Jan 2020 19:21:44 +0100 +Subject: [PATCH] target/i386: kvm: initialize microcode revision from KVM + +KVM can return the host microcode revision as a feature MSR. +Use it as the default value for -cpu host. + +Signed-off-by: Paolo Bonzini +Message-Id: <1579544504-3616-4-git-send-email-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 4 ++++ + target/i386/kvm.c | 5 +++++ + 2 files changed, 9 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index ec8bc9957e..1962f00c77 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6330,6 +6330,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + &cpu->mwait.ecx, &cpu->mwait.edx); + env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; + } ++ if (kvm_enabled() && cpu->ucode_rev == 0) { ++ cpu->ucode_rev = kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV); ++ } + } + + if (cpu->ucode_rev == 0) { +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index 60060087fd..7437f86130 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -2628,6 +2628,11 @@ static void kvm_init_msrs(X86CPU *cpu) + env->features[FEAT_CORE_CAPABILITY]); + } + ++ if (kvm_arch_get_supported_msr_feature(kvm_state, ++ MSR_IA32_UCODE_REV)) { ++ kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); ++ } ++ + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. +-- +2.27.0 + diff --git a/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch b/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch new file mode 100644 index 0000000000000000000000000000000000000000..462768e0318844720a338f553914e6bfbcdb0c8c --- /dev/null +++ b/target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch @@ -0,0 +1,68 @@ +From 3b172cd5a6e62be725c778b8397310462fe0a890 Mon Sep 17 00:00:00 2001 +From: "plai@redhat.com" +Date: Thu, 7 May 2020 22:09:23 +0100 +Subject: [PATCH] target/i386: set the CPUID level to 0x14 on old machine-type + +RH-Author: plai@redhat.com +Message-id: <20200507220923.13723-1-plai@redhat.com> +Patchwork-id: 96347 +O-Subject: [RHEL8.2.1 AV qemu-kvm PATCH RESEND] target/i386: set the CPUID level to 0x14 on old machine-type +Bugzilla: 1513681 +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Igor Mammedov +RH-Acked-by: Danilo de Paula + +From: Luwei Kang + +BZ https://bugzilla.redhat.com/show_bug.cgi?id=1513681 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=28146304 +Branch: rhel-av-8.2.1 + +Tested on intel-icelake-y-01.ml3.eng.bos.redhat.com. + +The CPUID level need to be set to 0x14 manually on old +machine-type if Intel PT is enabled in guest. E.g. the +CPUID[0].EAX(level)=7 and CPUID[7].EBX[25](intel-pt)=1 when the +Qemu with "-machine pc-i440fx-3.1 -cpu qemu64,+intel-pt" parameter. + +Some Intel PT capabilities are exposed by leaf 0x14 and the +missing capabilities will cause some MSRs access failed. +This patch add a warning message to inform the user to extend +the CPUID level. + +Suggested-by: Eduardo Habkost +Signed-off-by: Luwei Kang +Message-Id: <1584031686-16444-1-git-send-email-luwei.kang@intel.com> +Signed-off-by: Eduardo Habkost +(cherry picked from commit ddc2fc9e4e42ebce48b088963dc7fbd1c08d5f33) +Signed-off-by: Paul Lai +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6147cd419a..35a33db39a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6206,9 +6206,14 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); + + /* Intel Processor Trace requires CPUID[0x14] */ +- if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && +- kvm_enabled() && cpu->intel_pt_auto_level) { +- x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); ++ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT)) { ++ if (cpu->intel_pt_auto_level) { ++ x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); ++ } else if (cpu->env.cpuid_min_level < 0x14) { ++ mark_unavailable_features(cpu, FEAT_7_0_EBX, ++ CPUID_7_0_EBX_INTEL_PT, ++ "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,+intel-pt,level=0x14\""); ++ } + } + + /* CPU topology with multi-dies support requires CPUID[0x1F] */ +-- +2.27.0 + diff --git a/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch b/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4156952cf7fca7359fb38c8db086837b8ba3651 --- /dev/null +++ b/target-i386-work-around-KVM_GET_MSRS-bug-for-seconda.patch @@ -0,0 +1,49 @@ +From 70e4d278b89e04d7f9397ea25163feb6a7dbaa2d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 2 Jul 2019 14:58:48 +0200 +Subject: [PATCH] target/i386: work around KVM_GET_MSRS bug for secondary + execution controls + +Some secondary controls are automatically enabled/disabled based on the CPUID +values that are set for the guest. However, they are still available at a +global level and therefore should be present when KVM_GET_MSRS is sent to +/dev/kvm. + +Unfortunately KVM forgot to include those, so fix that. + +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index fafb9fb26d..b97f40df6b 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -474,6 +474,23 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: ++ /* KVM forgot to add these bits for some time, do this ourselves. */ ++ if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; ++ } ++ if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { ++ value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; ++ } ++ /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: +-- +2.27.0 + diff --git a/tcp_emu-Fix-oob-access.patch b/tcp_emu-Fix-oob-access.patch index 5182f54363d585efd23f0a5c236d0e9c5153215e..807dfef08e28fe33a65fede676bbb076f5d9e393 100644 --- a/tcp_emu-Fix-oob-access.patch +++ b/tcp_emu-Fix-oob-access.patch @@ -1,6 +1,6 @@ -From 0f7224535cdfec549cd43a5ae4ccde936f50ee95 Mon Sep 17 00:00:00 2001 +From 585634894f511bc1821cef54494bf2d9abc109c9 Mon Sep 17 00:00:00 2001 From: Samuel Thibault -Date: Wed, 11 Mar 2020 17:33:46 +0800 +Date: Tue, 14 Apr 2020 18:04:33 +0800 Subject: [PATCH] tcp_emu: Fix oob access The main loop only checks for one available byte, while we sometimes @@ -10,29 +10,28 @@ need two bytes. 1 file changed, 6 insertions(+) diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index fde9207b..4608942f 100644 +index d6dd133a..9c94c03a 100644 --- a/slirp/src/tcp_subr.c +++ b/slirp/src/tcp_subr.c -@@ -895,6 +895,9 @@ tcp_emu(struct socket *so, struct mbuf *m) - break; - - case 5: -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ +@@ -886,6 +886,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + break; + + case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ + - /* - * The difference between versions 1.0 and - * 2.0 is here. For future versions of -@@ -910,6 +913,9 @@ tcp_emu(struct socket *so, struct mbuf *m) - /* This is the field containing the port - * number that RA-player is listening to. - */ -+ if (bptr == m->m_data + m->m_len - 1) -+ return 1; /* We need two bytes */ + /* + * The difference between versions 1.0 and + * 2.0 is here. For future versions of +@@ -901,6 +904,9 @@ int tcp_emu(struct socket *so, struct mbuf *m) + /* This is the field containing the port + * number that RA-player is listening to. + */ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ + - lport = (((uint8_t*)bptr)[0] << 8) - + ((uint8_t *)bptr)[1]; - if (lport < 6970) --- -2.21.1 (Apple Git-122.3) - + lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; + if (lport < 6970) + lport += 256; /* don't know why */ +-- +2.23.0 diff --git a/tcp_emu-fix-unsafe-snprintf-usages.patch b/tcp_emu-fix-unsafe-snprintf-usages.patch index cc13154ca1449831d6a91dbacc27234af4caf0e5..2f6850a60c2fb942ecc7ef15030686d3dd94aa9c 100644 --- a/tcp_emu-fix-unsafe-snprintf-usages.patch +++ b/tcp_emu-fix-unsafe-snprintf-usages.patch @@ -1,6 +1,6 @@ -From 1db8bcc0ec91bb4374b3ffdd03da3c4ede381fb5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= -Date: Wed, 11 Mar 2020 18:52:07 +0800 +From 220a52fda279038d46c25d39a372154ff9b024d2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureauls?= +Date: Tue, 14 Apr 2020 19:06:35 +0800 Subject: [PATCH] tcp_emu: fix unsafe snprintf() usages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 @@ -28,67 +28,76 @@ Signed-off-by: default avatarMarc-André Lureau Reviewed-by: Samuel Thibault's avatarSamuel Thibault Message-Id: <20200127092414.169796-7-marcandre.lureau@redhat.com> --- - slirp/src/tcp_subr.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) + slirp/src/tcp_subr.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c -index e898fd03..88dadc76 100644 +index 019b637a..6c1b17bd 100644 --- a/slirp/src/tcp_subr.c +++ b/slirp/src/tcp_subr.c -@@ -707,7 +707,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "ORT %d,%d,%d,%d,%d,%d\r\n%s", - n1, n2, n3, n4, n5, n6, x==7?buff:""); - return 1; -@@ -740,7 +740,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - n4 = (laddr & 0xff); - - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", - n1, n2, n3, n4, n5, n6, x==7?buff:""); - -@@ -766,7 +766,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - if (m->m_data[m->m_len-1] == '\0' && lport != 0 && - (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, - htons(lport), SS_FACCEPTONCE)) != NULL) -- m->m_len = snprintf(m->m_data, M_ROOM(m), -+ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), - "%d", ntohs(so->so_fport)) + 1; - return 1; - -@@ -786,7 +786,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "DCC CHAT chat %lu %u%c\n", - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), 1); -@@ -797,7 +797,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "DCC SEND %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); -@@ -808,7 +808,7 @@ tcp_emu(struct socket *so, struct mbuf *m) - return 1; - } - m->m_len = bptr - m->m_data; /* Adjust length */ -- m->m_len += snprintf(bptr, M_FREEROOM(m), -+ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), - "DCC MOVE %s %lu %u %u%c\n", buff, - (unsigned long)ntohl(so->so_faddr.s_addr), - ntohs(so->so_fport), n1, 1); --- -2.21.1 (Apple Git-122.3) +@@ -655,8 +655,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + NTOHS(n1); + NTOHS(n2); + m_inc(m, snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); +- m->m_len = snprintf(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); +- assert(m->m_len < M_ROOM(m)); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); + } else { + *eol = '\r'; + } +@@ -696,7 +695,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + n4 = (laddr & 0xff); + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), + "ORT %d,%d,%d,%d,%d,%d\r\n%s", n1, n2, n3, n4, + n5, n6, x == 7 ? buff : ""); + return 1; +@@ -732,7 +731,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, M_FREEROOM(m), ++ slirp_fmt(bptr, M_FREEROOM(m), + "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", + n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); + +@@ -759,7 +758,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, + htons(lport), SS_FACCEPTONCE)) != NULL) + m->m_len = +- snprintf(m->m_data, M_ROOM(m), ++ slirp_fmt0(m->m_data, M_ROOM(m), + "%d", ntohs(so->so_fport)) + 1; + return 1; + +@@ -779,7 +778,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + return 1; + } + m->m_len = bptr - m->m_data; /* Adjust length */ +- m->m_len += snprintf(bptr, M_FREEROOM(m), ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), + "DCC CHAT chat %lu %u%c\n", + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), 1); +@@ -791,7 +790,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, M_FREEROOM(m), ++ slirp_fmt(bptr, M_FREEROOM(m), + "DCC SEND %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); +@@ -803,7 +802,7 @@ int tcp_emu(struct socket *so, struct mbuf *m) + } + m->m_len = bptr - m->m_data; /* Adjust length */ + m->m_len += +- snprintf(bptr, M_FREEROOM(m), ++ slirp_fmt(bptr, M_FREEROOM(m), + "DCC MOVE %s %lu %u %u%c\n", buff, + (unsigned long)ntohl(so->so_faddr.s_addr), + ntohs(so->so_fport), n1, 1); +-- +2.23.0 diff --git a/test-numa-Adjust-aarch64-numa-test.patch b/test-numa-Adjust-aarch64-numa-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..24145937724385b1ff8dd0bd280e5e62341ad659 --- /dev/null +++ b/test-numa-Adjust-aarch64-numa-test.patch @@ -0,0 +1,58 @@ +From 3ef97cc418d1061fc0ec70098270ce2d76005cc1 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 23 Apr 2020 20:54:18 +0800 +Subject: [PATCH] test/numa: Adjust aarch64 numa test + +We have supported topology for arm/virt in previous patch, which +changes the meaning of "thread-id", so we must modify test case. + +Signed-off-by: Keqian Zhu +--- + tests/numa-test.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/tests/numa-test.c b/tests/numa-test.c +index 8de8581231..71cdd7b4f7 100644 +--- a/tests/numa-test.c ++++ b/tests/numa-test.c +@@ -231,17 +231,17 @@ static void aarch64_numa_cpu(const void *data) + QObject *e; + QTestState *qts; + +- cli = make_cli(data, "-smp 2 " ++ cli = make_cli(data, "-smp 2,cores=2 " + "-numa node,nodeid=0 -numa node,nodeid=1 " +- "-numa cpu,node-id=1,thread-id=0 " +- "-numa cpu,node-id=0,thread-id=1"); ++ "-numa cpu,node-id=1,core-id=0 " ++ "-numa cpu,node-id=0,core-id=1"); + qts = qtest_init(cli); + cpus = get_cpus(qts, &resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; +- int64_t thread, node; ++ int64_t core, node; + + cpu = qobject_to(QDict, e); + g_assert(qdict_haskey(cpu, "props")); +@@ -249,12 +249,12 @@ static void aarch64_numa_cpu(const void *data) + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); +- g_assert(qdict_haskey(props, "thread-id")); +- thread = qdict_get_int(props, "thread-id"); ++ g_assert(qdict_haskey(props, "core-id")); ++ core = qdict_get_int(props, "core-id"); + +- if (thread == 0) { ++ if (core == 0) { + g_assert_cmpint(node, ==, 1); +- } else if (thread == 1) { ++ } else if (core == 1) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); +-- +2.19.1 diff --git a/test-tpm-pass-optional-machine-options-to-swtpm-test.patch b/test-tpm-pass-optional-machine-options-to-swtpm-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..fe7fd4ac907813d676cdf0c2a713e31279c29685 --- /dev/null +++ b/test-tpm-pass-optional-machine-options-to-swtpm-test.patch @@ -0,0 +1,187 @@ +From c06a3ceacc1793bc1cfe5c2a6ed510c9aea8253d Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Thu, 13 Aug 2020 20:28:25 +0800 +Subject: [PATCH 17/19] test: tpm: pass optional machine options to swtpm test + functions + +We plan to use swtpm test functions on ARM for testing the +sysbus TPM-TIS device. However on ARM there is no default machine +type. So we need to explictly pass some machine options on startup. +Let's allow this by adding a new parameter to both swtpm test +functions and update all call sites. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Message-id: 20200305165149.618-9-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + tests/tpm-crb-swtpm-test.c | 5 +++-- + tests/tpm-tests.c | 10 ++++++---- + tests/tpm-tests.h | 5 +++-- + tests/tpm-tis-swtpm-test.c | 5 +++-- + tests/tpm-util.c | 8 ++++++-- + tests/tpm-util.h | 3 ++- + 6 files changed, 23 insertions(+), 13 deletions(-) + +diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c +index 2c4fb8ae..5228cb7a 100644 +--- a/tests/tpm-crb-swtpm-test.c ++++ b/tests/tpm-crb-swtpm-test.c +@@ -29,7 +29,8 @@ static void tpm_crb_swtpm_test(const void *data) + { + const TestState *ts = data; + +- tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer, "tpm-crb"); ++ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_crb_transfer, ++ "tpm-crb", NULL); + } + + static void tpm_crb_swtpm_migration_test(const void *data) +@@ -37,7 +38,7 @@ static void tpm_crb_swtpm_migration_test(const void *data) + const TestState *ts = data; + + tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, +- tpm_util_crb_transfer, "tpm-crb"); ++ tpm_util_crb_transfer, "tpm-crb", NULL); + } + + int main(int argc, char **argv) +diff --git a/tests/tpm-tests.c b/tests/tpm-tests.c +index e640777a..d823bda8 100644 +--- a/tests/tpm-tests.c ++++ b/tests/tpm-tests.c +@@ -30,7 +30,7 @@ tpm_test_swtpm_skip(void) + } + + void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, +- const char *ifmodel) ++ const char *ifmodel, const char *machine_options) + { + char *args = NULL; + QTestState *s; +@@ -47,10 +47,11 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, + g_assert_true(succ); + + args = g_strdup_printf( ++ "%s " + "-chardev socket,id=chr,path=%s " + "-tpmdev emulator,id=dev,chardev=chr " + "-device %s,tpmdev=dev", +- addr->u.q_unix.path, ifmodel); ++ machine_options ? : "", addr->u.q_unix.path, ifmodel); + + s = qtest_start(args); + g_free(args); +@@ -78,7 +79,8 @@ void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, + void tpm_test_swtpm_migration_test(const char *src_tpm_path, + const char *dst_tpm_path, + const char *uri, tx_func *tx, +- const char *ifmodel) ++ const char *ifmodel, ++ const char *machine_options) + { + gboolean succ; + GPid src_tpm_pid, dst_tpm_pid; +@@ -100,7 +102,7 @@ void tpm_test_swtpm_migration_test(const char *src_tpm_path, + + tpm_util_migration_start_qemu(&src_qemu, &dst_qemu, + src_tpm_addr, dst_tpm_addr, uri, +- ifmodel); ++ ifmodel, machine_options); + + tpm_util_startup(src_qemu, tx); + tpm_util_pcrextend(src_qemu, tx); +diff --git a/tests/tpm-tests.h b/tests/tpm-tests.h +index b97688fe..a5df35ab 100644 +--- a/tests/tpm-tests.h ++++ b/tests/tpm-tests.h +@@ -16,11 +16,12 @@ + #include "tpm-util.h" + + void tpm_test_swtpm_test(const char *src_tpm_path, tx_func *tx, +- const char *ifmodel); ++ const char *ifmodel, const char *machine_options); + + void tpm_test_swtpm_migration_test(const char *src_tpm_path, + const char *dst_tpm_path, + const char *uri, tx_func *tx, +- const char *ifmodel); ++ const char *ifmodel, ++ const char *machine_options); + + #endif /* TESTS_TPM_TESTS_H */ +diff --git a/tests/tpm-tis-swtpm-test.c b/tests/tpm-tis-swtpm-test.c +index 9f58a3a9..9470f157 100644 +--- a/tests/tpm-tis-swtpm-test.c ++++ b/tests/tpm-tis-swtpm-test.c +@@ -29,7 +29,8 @@ static void tpm_tis_swtpm_test(const void *data) + { + const TestState *ts = data; + +- tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, "tpm-tis"); ++ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, ++ "tpm-tis", NULL); + } + + static void tpm_tis_swtpm_migration_test(const void *data) +@@ -37,7 +38,7 @@ static void tpm_tis_swtpm_migration_test(const void *data) + const TestState *ts = data; + + tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, +- tpm_util_tis_transfer, "tpm-tis"); ++ tpm_util_tis_transfer, "tpm-tis", NULL); + } + + int main(int argc, char **argv) +diff --git a/tests/tpm-util.c b/tests/tpm-util.c +index e08b1376..7ecdae2f 100644 +--- a/tests/tpm-util.c ++++ b/tests/tpm-util.c +@@ -258,23 +258,27 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, + SocketAddress *src_tpm_addr, + SocketAddress *dst_tpm_addr, + const char *miguri, +- const char *ifmodel) ++ const char *ifmodel, ++ const char *machine_options) + { + char *src_qemu_args, *dst_qemu_args; + + src_qemu_args = g_strdup_printf( ++ "%s " + "-chardev socket,id=chr,path=%s " + "-tpmdev emulator,id=dev,chardev=chr " + "-device %s,tpmdev=dev ", +- src_tpm_addr->u.q_unix.path, ifmodel); ++ machine_options ? : "", src_tpm_addr->u.q_unix.path, ifmodel); + + *src_qemu = qtest_init(src_qemu_args); + + dst_qemu_args = g_strdup_printf( ++ "%s " + "-chardev socket,id=chr,path=%s " + "-tpmdev emulator,id=dev,chardev=chr " + "-device %s,tpmdev=dev " + "-incoming %s", ++ machine_options ? : "", + dst_tpm_addr->u.q_unix.path, + ifmodel, miguri); + +diff --git a/tests/tpm-util.h b/tests/tpm-util.h +index 5755698a..15e39249 100644 +--- a/tests/tpm-util.h ++++ b/tests/tpm-util.h +@@ -44,7 +44,8 @@ void tpm_util_migration_start_qemu(QTestState **src_qemu, + SocketAddress *src_tpm_addr, + SocketAddress *dst_tpm_addr, + const char *miguri, +- const char *ifmodel); ++ const char *ifmodel, ++ const char *machine_options); + + void tpm_util_wait_for_migration_complete(QTestState *who); + +-- +2.23.0 + diff --git a/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch b/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..fe33c8f4bd99eba304dc696d70a5126c559cd052 --- /dev/null +++ b/test-tpm-tis-Add-Sysbus-TPM-TIS-device-test.patch @@ -0,0 +1,226 @@ +From 2d28c0edddeaee5e4aa6e8c6b109776cddc1c4e4 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Thu, 13 Aug 2020 21:37:23 +0800 +Subject: [PATCH 19/19] test: tpm-tis: Add Sysbus TPM-TIS device test + +The tests themselves are the same as the ISA device ones. +Only the main() changes as the tpm-tis-device device gets +instantiated. Also the base address of the device is not +0xFED40000 anymore but matches the base address of the +ARM virt platform bus. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Message-id: 20200305165149.618-11-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + tests/Makefile.include | 5 ++ + tests/tpm-tis-device-swtpm-test.c | 76 +++++++++++++++++++++++++++ + tests/tpm-tis-device-test.c | 87 +++++++++++++++++++++++++++++++ + 3 files changed, 168 insertions(+) + create mode 100644 tests/tpm-tis-device-swtpm-test.c + create mode 100644 tests/tpm-tis-device-test.c + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 950b32a2..d6de4e10 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -263,6 +263,8 @@ check-qtest-arm-y += tests/boot-serial-test$(EXESUF) + check-qtest-arm-y += tests/hexloader-test$(EXESUF) + check-qtest-arm-$(CONFIG_PFLASH_CFI02) += tests/pflash-cfi02-test$(EXESUF) + ++check-qtest-aarch64-$(CONFIG_TPM_TIS_SYSBUS) += tpm-tis-device-test ++check-qtest-aarch64-$(CONFIG_TPM_TIS_SYSBUS) += tpm-tis-device-swtpm-test + check-qtest-aarch64-y = tests/numa-test$(EXESUF) + check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) + check-qtest-aarch64-y += tests/migration-test$(EXESUF) +@@ -667,7 +669,10 @@ tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o tests/tpm-emu.o \ + tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o $(test-io-obj-y) + tests/tpm-tis-swtpm-test$(EXESUF): tests/tpm-tis-swtpm-test.o tests/tpm-emu.o \ + tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) ++tests/tpm-tis-device-swtpm-test$(EXESUF): tests/tpm-tis-device-swtpm-test.o tests/tpm-emu.o \ ++ tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) + tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) ++tests/tpm-tis-device-test$(EXESUF): tests/tpm-tis-device-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) + tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \ + tests/io-channel-helpers.o $(test-io-obj-y) + tests/test-io-channel-tls$(EXESUF): tests/test-io-channel-tls.o \ +diff --git a/tests/tpm-tis-device-swtpm-test.c b/tests/tpm-tis-device-swtpm-test.c +new file mode 100644 +index 00000000..7b200351 +--- /dev/null ++++ b/tests/tpm-tis-device-swtpm-test.c +@@ -0,0 +1,76 @@ ++/* ++ * QTest testcase for Sysbus TPM TIS talking to external swtpm and swtpm ++ * migration ++ * ++ * Copyright (c) 2018 IBM Corporation ++ * with parts borrowed from migration-test.c that is: ++ * Copyright (c) 2016-2018 Red Hat, Inc. and/or its affiliates ++ * ++ * Authors: ++ * Stefan Berger ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include ++ ++#include "libqtest.h" ++#include "qemu/module.h" ++#include "tpm-tests.h" ++#include "hw/acpi/tpm.h" ++ ++uint64_t tpm_tis_base_addr = 0xc000000; ++#define MACHINE_OPTIONS "-machine virt,gic-version=max -accel tcg" ++ ++typedef struct TestState { ++ char *src_tpm_path; ++ char *dst_tpm_path; ++ char *uri; ++} TestState; ++ ++static void tpm_tis_swtpm_test(const void *data) ++{ ++ const TestState *ts = data; ++ ++ tpm_test_swtpm_test(ts->src_tpm_path, tpm_util_tis_transfer, ++ "tpm-tis-device", MACHINE_OPTIONS); ++} ++ ++static void tpm_tis_swtpm_migration_test(const void *data) ++{ ++ const TestState *ts = data; ++ ++ tpm_test_swtpm_migration_test(ts->src_tpm_path, ts->dst_tpm_path, ts->uri, ++ tpm_util_tis_transfer, "tpm-tis-device", ++ MACHINE_OPTIONS); ++} ++ ++int main(int argc, char **argv) ++{ ++ int ret; ++ TestState ts = { 0 }; ++ ++ ts.src_tpm_path = g_dir_make_tmp("qemu-tpm-tis-device-swtpm-test.XXXXXX", ++ NULL); ++ ts.dst_tpm_path = g_dir_make_tmp("qemu-tpm-tis-device-swtpm-test.XXXXXX", ++ NULL); ++ ts.uri = g_strdup_printf("unix:%s/migsocket", ts.src_tpm_path); ++ ++ module_call_init(MODULE_INIT_QOM); ++ g_test_init(&argc, &argv, NULL); ++ ++ qtest_add_data_func("/tpm/tis-swtpm/test", &ts, tpm_tis_swtpm_test); ++ qtest_add_data_func("/tpm/tis-swtpm-migration/test", &ts, ++ tpm_tis_swtpm_migration_test); ++ ret = g_test_run(); ++ ++ g_rmdir(ts.dst_tpm_path); ++ g_free(ts.dst_tpm_path); ++ g_rmdir(ts.src_tpm_path); ++ g_free(ts.src_tpm_path); ++ g_free(ts.uri); ++ ++ return ret; ++} +diff --git a/tests/tpm-tis-device-test.c b/tests/tpm-tis-device-test.c +new file mode 100644 +index 00000000..63ed3644 +--- /dev/null ++++ b/tests/tpm-tis-device-test.c +@@ -0,0 +1,87 @@ ++/* ++ * QTest testcase for SYSBUS TPM TIS ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * Copyright (c) 2018 IBM Corporation ++ * ++ * Authors: ++ * Marc-André Lureau ++ * Stefan Berger ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include ++ ++#include "io/channel-socket.h" ++#include "libqtest-single.h" ++#include "qemu/module.h" ++#include "tpm-emu.h" ++#include "tpm-util.h" ++#include "tpm-tis-util.h" ++ ++/* ++ * As the Sysbus tpm-tis-device is instantiated on the ARM virt ++ * platform bus and it is the only sysbus device dynamically ++ * instantiated, it gets plugged at its base address ++ */ ++uint64_t tpm_tis_base_addr = 0xc000000; ++ ++int main(int argc, char **argv) ++{ ++ char *tmp_path = g_dir_make_tmp("qemu-tpm-tis-device-test.XXXXXX", NULL); ++ GThread *thread; ++ TestState test; ++ char *args; ++ int ret; ++ ++ module_call_init(MODULE_INIT_QOM); ++ g_test_init(&argc, &argv, NULL); ++ ++ test.addr = g_new0(SocketAddress, 1); ++ test.addr->type = SOCKET_ADDRESS_TYPE_UNIX; ++ test.addr->u.q_unix.path = g_build_filename(tmp_path, "sock", NULL); ++ g_mutex_init(&test.data_mutex); ++ g_cond_init(&test.data_cond); ++ test.data_cond_signal = false; ++ ++ thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test); ++ tpm_emu_test_wait_cond(&test); ++ ++ args = g_strdup_printf( ++ "-machine virt,gic-version=max -accel tcg " ++ "-chardev socket,id=chr,path=%s " ++ "-tpmdev emulator,id=dev,chardev=chr " ++ "-device tpm-tis-device,tpmdev=dev", ++ test.addr->u.q_unix.path); ++ qtest_start(args); ++ ++ qtest_add_data_func("/tpm-tis/test_check_localities", &test, ++ tpm_tis_test_check_localities); ++ ++ qtest_add_data_func("/tpm-tis/test_check_access_reg", &test, ++ tpm_tis_test_check_access_reg); ++ ++ qtest_add_data_func("/tpm-tis/test_check_access_reg_seize", &test, ++ tpm_tis_test_check_access_reg_seize); ++ ++ qtest_add_data_func("/tpm-tis/test_check_access_reg_release", &test, ++ tpm_tis_test_check_access_reg_release); ++ ++ qtest_add_data_func("/tpm-tis/test_check_transmit", &test, ++ tpm_tis_test_check_transmit); ++ ++ ret = g_test_run(); ++ ++ qtest_end(); ++ ++ g_thread_join(thread); ++ g_unlink(test.addr->u.q_unix.path); ++ qapi_free_SocketAddress(test.addr); ++ g_rmdir(tmp_path); ++ g_free(tmp_path); ++ g_free(args); ++ return ret; ++} +-- +2.23.0 + diff --git a/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch b/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c7be00a5f1d015d5da4fcf4791e6a175f0ff9c7 --- /dev/null +++ b/test-tpm-tis-Get-prepared-to-share-tests-between-ISA.patch @@ -0,0 +1,1044 @@ +From c8ed2a1fbe306ecbfb5c7d4156ae81c029829d95 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Thu, 13 Aug 2020 20:56:54 +0800 +Subject: [PATCH 18/19] test: tpm-tis: Get prepared to share tests between ISA + and sysbus devices + +ISA and sysbus TPM-TIS devices will share their tests. Only +the main() will change (instantiation option is different). +Also the base address of the TPM-TIS device is going to be +different. on x86 it is located at 0xFED40000 while on ARM +it can be located at any location, discovered through the +device tree description. + +So we put shared test functions in a new object module. +Each test needs to set tpm_tis_base_addr global variable. + +Also take benefit of this move to fix "block comments using +a leading */ on a separate line" checkpatch warnings. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Message-id: 20200305165149.618-10-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + tests/Makefile.include | 2 +- + tests/tpm-crb-swtpm-test.c | 4 + + tests/tpm-crb-test.c | 3 + + tests/tpm-tis-swtpm-test.c | 3 + + tests/tpm-tis-test.c | 414 +--------------------------------- + tests/tpm-tis-util.c | 451 +++++++++++++++++++++++++++++++++++++ + tests/tpm-tis-util.h | 23 ++ + tests/tpm-util.c | 3 - + tests/tpm-util.h | 5 + + 9 files changed, 493 insertions(+), 415 deletions(-) + create mode 100644 tests/tpm-tis-util.c + create mode 100644 tests/tpm-tis-util.h + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index c151de64..950b32a2 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -667,7 +667,7 @@ tests/tpm-crb-swtpm-test$(EXESUF): tests/tpm-crb-swtpm-test.o tests/tpm-emu.o \ + tests/tpm-crb-test$(EXESUF): tests/tpm-crb-test.o tests/tpm-emu.o $(test-io-obj-y) + tests/tpm-tis-swtpm-test$(EXESUF): tests/tpm-tis-swtpm-test.o tests/tpm-emu.o \ + tests/tpm-util.o tests/tpm-tests.o $(test-io-obj-y) +-tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-emu.o $(test-io-obj-y) ++tests/tpm-tis-test$(EXESUF): tests/tpm-tis-test.o tests/tpm-tis-util.o tests/tpm-emu.o $(test-io-obj-y) + tests/test-io-channel-file$(EXESUF): tests/test-io-channel-file.o \ + tests/io-channel-helpers.o $(test-io-obj-y) + tests/test-io-channel-tls$(EXESUF): tests/test-io-channel-tls.o \ +diff --git a/tests/tpm-crb-swtpm-test.c b/tests/tpm-crb-swtpm-test.c +index 5228cb7a..55fdb565 100644 +--- a/tests/tpm-crb-swtpm-test.c ++++ b/tests/tpm-crb-swtpm-test.c +@@ -18,6 +18,10 @@ + #include "libqtest.h" + #include "qemu/module.h" + #include "tpm-tests.h" ++#include "hw/acpi/tpm.h" ++ ++/* Not used but needed for linking */ ++uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; + + typedef struct TestState { + char *src_tpm_path; +diff --git a/tests/tpm-crb-test.c b/tests/tpm-crb-test.c +index a139caa5..32695810 100644 +--- a/tests/tpm-crb-test.c ++++ b/tests/tpm-crb-test.c +@@ -19,6 +19,9 @@ + #include "qemu/module.h" + #include "tpm-emu.h" + ++/* Not used but needed for linking */ ++uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; ++ + #define TPM_CMD "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00" + + static void tpm_crb_test(const void *data) +diff --git a/tests/tpm-tis-swtpm-test.c b/tests/tpm-tis-swtpm-test.c +index 9470f157..90131cb3 100644 +--- a/tests/tpm-tis-swtpm-test.c ++++ b/tests/tpm-tis-swtpm-test.c +@@ -18,6 +18,9 @@ + #include "libqtest.h" + #include "qemu/module.h" + #include "tpm-tests.h" ++#include "hw/acpi/tpm.h" ++ ++uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; + + typedef struct TestState { + char *src_tpm_path; +diff --git a/tests/tpm-tis-test.c b/tests/tpm-tis-test.c +index 92a7e95a..8042de13 100644 +--- a/tests/tpm-tis-test.c ++++ b/tests/tpm-tis-test.c +@@ -1,5 +1,5 @@ + /* +- * QTest testcase for TPM TIS ++ * QTest testcase for ISA TPM TIS + * + * Copyright (c) 2018 Red Hat, Inc. + * Copyright (c) 2018 IBM Corporation +@@ -20,417 +20,9 @@ + #include "libqtest.h" + #include "qemu/module.h" + #include "tpm-emu.h" ++#include "tpm-tis-util.h" + +-#define TIS_REG(LOCTY, REG) \ +- (TPM_TIS_ADDR_BASE + ((LOCTY) << 12) + REG) +- +-#define DEBUG_TIS_TEST 0 +- +-#define DPRINTF(fmt, ...) do { \ +- if (DEBUG_TIS_TEST) { \ +- printf(fmt, ## __VA_ARGS__); \ +- } \ +-} while (0) +- +-#define DPRINTF_ACCESS \ +- DPRINTF("%s: %d: locty=%d l=%d access=0x%02x pending_request_flag=0x%x\n", \ +- __func__, __LINE__, locty, l, access, pending_request_flag) +- +-#define DPRINTF_STS \ +- DPRINTF("%s: %d: sts = 0x%08x\n", __func__, __LINE__, sts) +- +-static const uint8_t TPM_CMD[12] = +- "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00"; +- +-static void tpm_tis_test_check_localities(const void *data) +-{ +- uint8_t locty; +- uint8_t access; +- uint32_t ifaceid; +- uint32_t capability; +- uint32_t didvid; +- uint32_t rid; +- +- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES; locty++) { +- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- capability = readl(TIS_REG(locty, TPM_TIS_REG_INTF_CAPABILITY)); +- g_assert_cmpint(capability, ==, TPM_TIS_CAPABILITIES_SUPPORTED2_0); +- +- ifaceid = readl(TIS_REG(locty, TPM_TIS_REG_INTERFACE_ID)); +- g_assert_cmpint(ifaceid, ==, TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0); +- +- didvid = readl(TIS_REG(locty, TPM_TIS_REG_DID_VID)); +- g_assert_cmpint(didvid, !=, 0); +- g_assert_cmpint(didvid, !=, 0xffffffff); +- +- rid = readl(TIS_REG(locty, TPM_TIS_REG_RID)); +- g_assert_cmpint(rid, !=, 0); +- g_assert_cmpint(rid, !=, 0xffffffff); +- } +-} +- +-static void tpm_tis_test_check_access_reg(const void *data) +-{ +- uint8_t locty; +- uint8_t access; +- +- /* do not test locality 4 (hw only) */ +- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* request use of locality */ +- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* release access */ +- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- } +-} +- +-/* +- * Test case for seizing access by a higher number locality +- */ +-static void tpm_tis_test_check_access_reg_seize(const void *data) +-{ +- int locty, l; +- uint8_t access; +- uint8_t pending_request_flag; +- +- /* do not test locality 4 (hw only) */ +- for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { +- pending_request_flag = 0; +- +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* request use of locality */ +- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* lower localities cannot seize access */ +- for (l = 0; l < locty; l++) { +- /* lower locality is not active */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* try to request use from 'l' */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- +- /* requesting use from 'l' was not possible; +- we must see REQUEST_USE and possibly PENDING_REQUEST */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_REQUEST_USE | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* locality 'locty' must be unchanged; +- we must see PENDING_REQUEST */ +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_PENDING_REQUEST | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* try to seize from 'l' */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); +- /* seize from 'l' was not possible */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_REQUEST_USE | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* locality 'locty' must be unchanged */ +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_PENDING_REQUEST | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* on the next loop we will have a PENDING_REQUEST flag +- set for locality 'l' */ +- pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; +- } +- +- /* higher localities can 'seize' access but not 'request use'; +- note: this will activate first l+1, then l+2 etc. */ +- for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { +- /* try to 'request use' from 'l' */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- +- /* requesting use from 'l' was not possible; we should see +- REQUEST_USE and may see PENDING_REQUEST */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_REQUEST_USE | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* locality 'l-1' must be unchanged; we should always +- see PENDING_REQUEST from 'l' requesting access */ +- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_PENDING_REQUEST | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* try to seize from 'l' */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); +- +- /* seize from 'l' was possible */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* l - 1 should show that it has BEEN_SEIZED */ +- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_BEEN_SEIZED | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* clear the BEEN_SEIZED flag and make sure it's gone */ +- writeb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_BEEN_SEIZED); +- +- access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- } +- +- /* PENDING_REQUEST will not be set if locty = 0 since all localities +- were active; in case of locty = 1, locality 0 will be active +- but no PENDING_REQUEST anywhere */ +- if (locty <= 1) { +- pending_request_flag = 0; +- } +- +- /* release access from l - 1; this activates locty - 1 */ +- l--; +- +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- +- DPRINTF("%s: %d: relinquishing control on l = %d\n", +- __func__, __LINE__, l); +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- for (l = locty - 1; l >= 0; l--) { +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* release this locality */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- +- if (l == 1) { +- pending_request_flag = 0; +- } +- } +- +- /* no locality may be active now */ +- for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- } +- } +-} +- +-/* +- * Test case for getting access when higher number locality relinquishes access +- */ +-static void tpm_tis_test_check_access_reg_release(const void *data) +-{ +- int locty, l; +- uint8_t access; +- uint8_t pending_request_flag; +- +- /* do not test locality 4 (hw only) */ +- for (locty = TPM_TIS_NUM_LOCALITIES - 2; locty >= 0; locty--) { +- pending_request_flag = 0; +- +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* request use of locality */ +- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- /* request use of all other localities */ +- for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { +- if (l == locty) { +- continue; +- } +- /* request use of locality 'l' -- we MUST see REQUEST USE and +- may see PENDING_REQUEST */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_REQUEST_USE | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; +- } +- /* release locality 'locty' */ +- writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- /* highest locality should now be active; release it and make sure the +- next higest locality is active afterwards */ +- for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) { +- if (l == locty) { +- continue; +- } +- /* 'l' should be active now */ +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- /* 'l' relinquishes access */ +- writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), +- TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); +- DPRINTF_ACCESS; +- if (l == 1 || (locty <= 1 && l == 2)) { +- pending_request_flag = 0; +- } +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- pending_request_flag | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- } +- } +-} +- +-/* +- * Test case for transmitting packets +- */ +-static void tpm_tis_test_check_transmit(const void *data) +-{ +- const TestState *s = data; +- uint8_t access; +- uint32_t sts; +- uint16_t bcount; +- size_t i; +- +- /* request use of locality 0 */ +- writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); +- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); +- g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | +- TPM_TIS_ACCESS_ACTIVE_LOCALITY | +- TPM_TIS_ACCESS_TPM_ESTABLISHMENT); +- +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- DPRINTF_STS; +- +- g_assert_cmpint(sts & 0xff, ==, 0); +- g_assert_cmpint(sts & TPM_TIS_STS_TPM_FAMILY_MASK, ==, +- TPM_TIS_STS_TPM_FAMILY2_0); +- +- bcount = (sts >> 8) & 0xffff; +- g_assert_cmpint(bcount, >=, 128); +- +- writel(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_COMMAND_READY); +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- DPRINTF_STS; +- g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_COMMAND_READY); +- +- /* transmit command */ +- for (i = 0; i < sizeof(TPM_CMD); i++) { +- writeb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO), TPM_CMD[i]); +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- DPRINTF_STS; +- if (i < sizeof(TPM_CMD) - 1) { +- g_assert_cmpint(sts & 0xff, ==, +- TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); +- } else { +- g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_VALID); +- } +- g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); +- } +- /* start processing */ +- writeb(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_TPM_GO); +- +- uint64_t end_time = g_get_monotonic_time() + 50 * G_TIME_SPAN_SECOND; +- do { +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- if ((sts & TPM_TIS_STS_DATA_AVAILABLE) != 0) { +- break; +- } +- } while (g_get_monotonic_time() < end_time); +- +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- DPRINTF_STS; +- g_assert_cmpint(sts & 0xff, == , +- TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); +- bcount = (sts >> 8) & 0xffff; +- +- /* read response */ +- uint8_t tpm_msg[sizeof(struct tpm_hdr)]; +- g_assert_cmpint(sizeof(tpm_msg), ==, bcount); +- +- for (i = 0; i < sizeof(tpm_msg); i++) { +- tpm_msg[i] = readb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO)); +- sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); +- DPRINTF_STS; +- if (sts & TPM_TIS_STS_DATA_AVAILABLE) { +- g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); +- } +- } +- g_assert_cmpmem(tpm_msg, sizeof(tpm_msg), s->tpm_msg, sizeof(*s->tpm_msg)); +- +- /* relinquish use of locality 0 */ +- writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_ACTIVE_LOCALITY); +- access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); +-} ++uint64_t tpm_tis_base_addr = TPM_TIS_ADDR_BASE; + + int main(int argc, char **argv) + { +diff --git a/tests/tpm-tis-util.c b/tests/tpm-tis-util.c +new file mode 100644 +index 00000000..9aff503f +--- /dev/null ++++ b/tests/tpm-tis-util.c +@@ -0,0 +1,451 @@ ++/* ++ * QTest testcase for TPM TIS: common test functions used for both ++ * the ISA and SYSBUS devices ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * Copyright (c) 2018 IBM Corporation ++ * ++ * Authors: ++ * Marc-André Lureau ++ * Stefan Berger ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include ++ ++#include "hw/acpi/tpm.h" ++#include "io/channel-socket.h" ++#include "libqtest.h" ++#include "qemu/module.h" ++#include "tpm-emu.h" ++#include "tpm-util.h" ++#include "tpm-tis-util.h" ++ ++#define DEBUG_TIS_TEST 0 ++ ++#define DPRINTF(fmt, ...) do { \ ++ if (DEBUG_TIS_TEST) { \ ++ printf(fmt, ## __VA_ARGS__); \ ++ } \ ++} while (0) ++ ++#define DPRINTF_ACCESS \ ++ DPRINTF("%s: %d: locty=%d l=%d access=0x%02x pending_request_flag=0x%x\n", \ ++ __func__, __LINE__, locty, l, access, pending_request_flag) ++ ++#define DPRINTF_STS \ ++ DPRINTF("%s: %d: sts = 0x%08x\n", __func__, __LINE__, sts) ++ ++static const uint8_t TPM_CMD[12] = ++ "\x80\x01\x00\x00\x00\x0c\x00\x00\x01\x44\x00\x00"; ++ ++void tpm_tis_test_check_localities(const void *data) ++{ ++ uint8_t locty; ++ uint8_t access; ++ uint32_t ifaceid; ++ uint32_t capability; ++ uint32_t didvid; ++ uint32_t rid; ++ ++ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES; locty++) { ++ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ capability = readl(TIS_REG(locty, TPM_TIS_REG_INTF_CAPABILITY)); ++ g_assert_cmpint(capability, ==, TPM_TIS_CAPABILITIES_SUPPORTED2_0); ++ ++ ifaceid = readl(TIS_REG(locty, TPM_TIS_REG_INTERFACE_ID)); ++ g_assert_cmpint(ifaceid, ==, TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0); ++ ++ didvid = readl(TIS_REG(locty, TPM_TIS_REG_DID_VID)); ++ g_assert_cmpint(didvid, !=, 0); ++ g_assert_cmpint(didvid, !=, 0xffffffff); ++ ++ rid = readl(TIS_REG(locty, TPM_TIS_REG_RID)); ++ g_assert_cmpint(rid, !=, 0); ++ g_assert_cmpint(rid, !=, 0xffffffff); ++ } ++} ++ ++void tpm_tis_test_check_access_reg(const void *data) ++{ ++ uint8_t locty; ++ uint8_t access; ++ ++ /* do not test locality 4 (hw only) */ ++ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* request use of locality */ ++ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* release access */ ++ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ } ++} ++ ++/* ++ * Test case for seizing access by a higher number locality ++ */ ++void tpm_tis_test_check_access_reg_seize(const void *data) ++{ ++ int locty, l; ++ uint8_t access; ++ uint8_t pending_request_flag; ++ ++ /* do not test locality 4 (hw only) */ ++ for (locty = 0; locty < TPM_TIS_NUM_LOCALITIES - 1; locty++) { ++ pending_request_flag = 0; ++ ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* request use of locality */ ++ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* lower localities cannot seize access */ ++ for (l = 0; l < locty; l++) { ++ /* lower locality is not active */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* try to request use from 'l' */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ ++ /* ++ * requesting use from 'l' was not possible; ++ * we must see REQUEST_USE and possibly PENDING_REQUEST ++ */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_REQUEST_USE | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* ++ * locality 'locty' must be unchanged; ++ * we must see PENDING_REQUEST ++ */ ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_PENDING_REQUEST | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* try to seize from 'l' */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); ++ /* seize from 'l' was not possible */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_REQUEST_USE | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* locality 'locty' must be unchanged */ ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_PENDING_REQUEST | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* ++ * on the next loop we will have a PENDING_REQUEST flag ++ * set for locality 'l' ++ */ ++ pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; ++ } ++ ++ /* ++ * higher localities can 'seize' access but not 'request use'; ++ * note: this will activate first l+1, then l+2 etc. ++ */ ++ for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { ++ /* try to 'request use' from 'l' */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ ++ /* ++ * requesting use from 'l' was not possible; we should see ++ * REQUEST_USE and may see PENDING_REQUEST ++ */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_REQUEST_USE | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* ++ * locality 'l-1' must be unchanged; we should always ++ * see PENDING_REQUEST from 'l' requesting access ++ */ ++ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_PENDING_REQUEST | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* try to seize from 'l' */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_SEIZE); ++ ++ /* seize from 'l' was possible */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* l - 1 should show that it has BEEN_SEIZED */ ++ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_BEEN_SEIZED | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* clear the BEEN_SEIZED flag and make sure it's gone */ ++ writeb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_BEEN_SEIZED); ++ ++ access = readb(TIS_REG(l - 1, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ } ++ ++ /* ++ * PENDING_REQUEST will not be set if locty = 0 since all localities ++ * were active; in case of locty = 1, locality 0 will be active ++ * but no PENDING_REQUEST anywhere ++ */ ++ if (locty <= 1) { ++ pending_request_flag = 0; ++ } ++ ++ /* release access from l - 1; this activates locty - 1 */ ++ l--; ++ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ ++ DPRINTF("%s: %d: relinquishing control on l = %d\n", ++ __func__, __LINE__, l); ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ for (l = locty - 1; l >= 0; l--) { ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* release this locality */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ ++ if (l == 1) { ++ pending_request_flag = 0; ++ } ++ } ++ ++ /* no locality may be active now */ ++ for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ } ++ } ++} ++ ++/* ++ * Test case for getting access when higher number locality relinquishes access ++ */ ++void tpm_tis_test_check_access_reg_release(const void *data) ++{ ++ int locty, l; ++ uint8_t access; ++ uint8_t pending_request_flag; ++ ++ /* do not test locality 4 (hw only) */ ++ for (locty = TPM_TIS_NUM_LOCALITIES - 2; locty >= 0; locty--) { ++ pending_request_flag = 0; ++ ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* request use of locality */ ++ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ access = readb(TIS_REG(locty, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ /* request use of all other localities */ ++ for (l = 0; l < TPM_TIS_NUM_LOCALITIES - 1; l++) { ++ if (l == locty) { ++ continue; ++ } ++ /* ++ * request use of locality 'l' -- we MUST see REQUEST USE and ++ * may see PENDING_REQUEST ++ */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_REQUEST_USE | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ pending_request_flag = TPM_TIS_ACCESS_PENDING_REQUEST; ++ } ++ /* release locality 'locty' */ ++ writeb(TIS_REG(locty, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ /* ++ * highest locality should now be active; release it and make sure the ++ * next higest locality is active afterwards ++ */ ++ for (l = TPM_TIS_NUM_LOCALITIES - 2; l >= 0; l--) { ++ if (l == locty) { ++ continue; ++ } ++ /* 'l' should be active now */ ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ /* 'l' relinquishes access */ ++ writeb(TIS_REG(l, TPM_TIS_REG_ACCESS), ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ access = readb(TIS_REG(l, TPM_TIS_REG_ACCESS)); ++ DPRINTF_ACCESS; ++ if (l == 1 || (locty <= 1 && l == 2)) { ++ pending_request_flag = 0; ++ } ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ pending_request_flag | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ } ++ } ++} ++ ++/* ++ * Test case for transmitting packets ++ */ ++void tpm_tis_test_check_transmit(const void *data) ++{ ++ const TestState *s = data; ++ uint8_t access; ++ uint32_t sts; ++ uint16_t bcount; ++ size_t i; ++ ++ /* request use of locality 0 */ ++ writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_REQUEST_USE); ++ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); ++ g_assert_cmpint(access, ==, TPM_TIS_ACCESS_TPM_REG_VALID_STS | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY | ++ TPM_TIS_ACCESS_TPM_ESTABLISHMENT); ++ ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ DPRINTF_STS; ++ ++ g_assert_cmpint(sts & 0xff, ==, 0); ++ g_assert_cmpint(sts & TPM_TIS_STS_TPM_FAMILY_MASK, ==, ++ TPM_TIS_STS_TPM_FAMILY2_0); ++ ++ bcount = (sts >> 8) & 0xffff; ++ g_assert_cmpint(bcount, >=, 128); ++ ++ writel(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_COMMAND_READY); ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ DPRINTF_STS; ++ g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_COMMAND_READY); ++ ++ /* transmit command */ ++ for (i = 0; i < sizeof(TPM_CMD); i++) { ++ writeb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO), TPM_CMD[i]); ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ DPRINTF_STS; ++ if (i < sizeof(TPM_CMD) - 1) { ++ g_assert_cmpint(sts & 0xff, ==, ++ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); ++ } else { ++ g_assert_cmpint(sts & 0xff, ==, TPM_TIS_STS_VALID); ++ } ++ g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); ++ } ++ /* start processing */ ++ writeb(TIS_REG(0, TPM_TIS_REG_STS), TPM_TIS_STS_TPM_GO); ++ ++ uint64_t end_time = g_get_monotonic_time() + 50 * G_TIME_SPAN_SECOND; ++ do { ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ if ((sts & TPM_TIS_STS_DATA_AVAILABLE) != 0) { ++ break; ++ } ++ } while (g_get_monotonic_time() < end_time); ++ ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ DPRINTF_STS; ++ g_assert_cmpint(sts & 0xff, == , ++ TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); ++ bcount = (sts >> 8) & 0xffff; ++ ++ /* read response */ ++ uint8_t tpm_msg[sizeof(struct tpm_hdr)]; ++ g_assert_cmpint(sizeof(tpm_msg), ==, bcount); ++ ++ for (i = 0; i < sizeof(tpm_msg); i++) { ++ tpm_msg[i] = readb(TIS_REG(0, TPM_TIS_REG_DATA_FIFO)); ++ sts = readl(TIS_REG(0, TPM_TIS_REG_STS)); ++ DPRINTF_STS; ++ if (sts & TPM_TIS_STS_DATA_AVAILABLE) { ++ g_assert_cmpint((sts >> 8) & 0xffff, ==, --bcount); ++ } ++ } ++ g_assert_cmpmem(tpm_msg, sizeof(tpm_msg), s->tpm_msg, sizeof(*s->tpm_msg)); ++ ++ /* relinquish use of locality 0 */ ++ writeb(TIS_REG(0, TPM_TIS_REG_ACCESS), TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ access = readb(TIS_REG(0, TPM_TIS_REG_ACCESS)); ++} +diff --git a/tests/tpm-tis-util.h b/tests/tpm-tis-util.h +new file mode 100644 +index 00000000..d10efe86 +--- /dev/null ++++ b/tests/tpm-tis-util.h +@@ -0,0 +1,23 @@ ++/* ++ * QTest TPM TIS: Common test functions used for both the ++ * ISA and SYSBUS devices ++ * ++ * Copyright (c) 2018 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef TESTS_TPM_TIS_UTIL_H ++#define TESTS_TPM_TIS_UTIL_H ++ ++void tpm_tis_test_check_localities(const void *data); ++void tpm_tis_test_check_access_reg(const void *data); ++void tpm_tis_test_check_access_reg_seize(const void *data); ++void tpm_tis_test_check_access_reg_release(const void *data); ++void tpm_tis_test_check_transmit(const void *data); ++ ++#endif /* TESTS_TPM_TIS_UTIL_H */ +diff --git a/tests/tpm-util.c b/tests/tpm-util.c +index 7ecdae2f..34efae8f 100644 +--- a/tests/tpm-util.c ++++ b/tests/tpm-util.c +@@ -19,9 +19,6 @@ + #include "tpm-util.h" + #include "qapi/qmp/qdict.h" + +-#define TIS_REG(LOCTY, REG) \ +- (TPM_TIS_ADDR_BASE + ((LOCTY) << 12) + REG) +- + void tpm_util_crb_transfer(QTestState *s, + const unsigned char *req, size_t req_size, + unsigned char *rsp, size_t rsp_size) +diff --git a/tests/tpm-util.h b/tests/tpm-util.h +index 15e39249..3b97d690 100644 +--- a/tests/tpm-util.h ++++ b/tests/tpm-util.h +@@ -15,6 +15,11 @@ + + #include "io/channel-socket.h" + ++extern uint64_t tpm_tis_base_addr; ++ ++#define TIS_REG(LOCTY, REG) \ ++ (tpm_tis_base_addr + ((LOCTY) << 12) + REG) ++ + typedef void (tx_func)(QTestState *s, + const unsigned char *req, size_t req_size, + unsigned char *rsp, size_t rsp_size); +-- +2.23.0 + diff --git a/tests-Add-bios-tests-to-arm-virt.patch b/tests-Add-bios-tests-to-arm-virt.patch new file mode 100644 index 0000000000000000000000000000000000000000..025afb506017f9bc1c6fdb26df35c9534a8f3672 --- /dev/null +++ b/tests-Add-bios-tests-to-arm-virt.patch @@ -0,0 +1,86 @@ +From abbcc35ccb22d81d69a28dc66b5f5d94e673a25e Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:33 +0100 +Subject: [PATCH] tests: Add bios tests to arm/virt + +This adds numamem and memhp tests for arm/virt platform. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Igor Mammedov +Message-Id: <20190918130633.4872-12-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + tests/bios-tables-test.c | 49 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c +index 53a91a8067..5e177b7155 100644 +--- a/tests/bios-tables-test.c ++++ b/tests/bios-tables-test.c +@@ -874,6 +874,53 @@ static void test_acpi_piix4_tcg_dimm_pxm(void) + test_acpi_tcg_dimm_pxm(MACHINE_PC); + } + ++static void test_acpi_virt_tcg_memhp(void) ++{ ++ test_data data = { ++ .machine = "virt", ++ .accel = "tcg", ++ .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", ++ .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", ++ .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", ++ .ram_start = 0x40000000ULL, ++ .scan_len = 256ULL * 1024 * 1024, ++ }; ++ ++ data.variant = ".memhp"; ++ test_acpi_one(" -cpu cortex-a57" ++ " -m 256M,slots=3,maxmem=1G" ++ " -object memory-backend-ram,id=ram0,size=128M" ++ " -object memory-backend-ram,id=ram1,size=128M" ++ " -numa node,memdev=ram0 -numa node,memdev=ram1" ++ " -numa dist,src=0,dst=1,val=21", ++ &data); ++ ++ free_test_data(&data); ++ ++} ++ ++static void test_acpi_virt_tcg_numamem(void) ++{ ++ test_data data = { ++ .machine = "virt", ++ .accel = "tcg", ++ .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", ++ .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", ++ .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", ++ .ram_start = 0x40000000ULL, ++ .scan_len = 128ULL * 1024 * 1024, ++ }; ++ ++ data.variant = ".numamem"; ++ test_acpi_one(" -cpu cortex-a57" ++ " -object memory-backend-ram,id=ram0,size=128M" ++ " -numa node,memdev=ram0", ++ &data); ++ ++ free_test_data(&data); ++ ++} ++ + static void test_acpi_virt_tcg(void) + { + test_data data = { +@@ -920,6 +967,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/dimmpxm", test_acpi_q35_tcg_dimm_pxm); + } else if (strcmp(arch, "aarch64") == 0) { + qtest_add_func("acpi/virt", test_acpi_virt_tcg); ++ qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); ++ qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); + } + ret = g_test_run(); + boot_sector_cleanup(disk); +-- +2.19.1 diff --git a/tests-Disalbe-filemonitor-testcase.patch b/tests-Disalbe-filemonitor-testcase.patch new file mode 100644 index 0000000000000000000000000000000000000000..b389299e35dd49154f6e660ee3d66237b15ec58b --- /dev/null +++ b/tests-Disalbe-filemonitor-testcase.patch @@ -0,0 +1,34 @@ +From 4f1eaa63065594276c11958e963377a09668d44b Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Thu, 6 Aug 2020 10:05:00 +0800 +Subject: [PATCH] tests: Disalbe filemonitor testcase + +Since filemonitor testcase requires that host kernel being a LTS version, +we cannot guarantee that on OBS system. Let's disable it by default. + +Signed-of-by: Ying Fang + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index d8cf00c1..f3273ad3 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -117,7 +117,6 @@ ifneq (,$(findstring qemu-ga,$(TOOLS))) + check-unit-$(call land,$(CONFIG_LINUX),$(CONFIG_VIRTIO_SERIAL)) += tests/test-qga$(EXESUF) + endif + check-unit-y += tests/test-timed-average$(EXESUF) +-check-unit-$(CONFIG_INOTIFY1) += tests/test-util-filemonitor$(EXESUF) + check-unit-y += tests/test-util-sockets$(EXESUF) + check-unit-$(CONFIG_BLOCK) += tests/test-authz-simple$(EXESUF) + check-unit-$(CONFIG_BLOCK) += tests/test-authz-list$(EXESUF) +@@ -654,8 +653,6 @@ tests/test-crypto-tlssession$(EXESUF): tests/test-crypto-tlssession.o \ + tests/crypto-tls-x509-helpers.o tests/pkix_asn1_tab.o \ + tests/crypto-tls-psk-helpers.o \ + $(test-crypto-obj-y) +-tests/test-util-filemonitor$(EXESUF): tests/test-util-filemonitor.o \ +- $(test-util-obj-y) + tests/test-util-sockets$(EXESUF): tests/test-util-sockets.o \ + tests/socket-helpers.o $(test-util-obj-y) + tests/test-authz-simple$(EXESUF): tests/test-authz-simple.o $(test-authz-obj-y) +-- +2.23.0 + diff --git a/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch b/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch new file mode 100644 index 0000000000000000000000000000000000000000..e739883feb04d14f33a97a2a0b6690ac6c5ccc24 --- /dev/null +++ b/tests-Update-ACPI-tables-list-for-upcoming-arm-virt-.patch @@ -0,0 +1,44 @@ +From 27e2533e43f0ab2b8a60f1902f58f8752581ea9f Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Wed, 18 Sep 2019 14:06:32 +0100 +Subject: [PATCH] tests: Update ACPI tables list for upcoming arm/virt tests + +This is in preparation to add numamem and memhp tests to +arm/virt platform. The bios-tables-test-allowed-diff.h +is updated with a list of expected ACPI tables that needs to be +present in tests/data/acpi/virt folder. + +Signed-off-by: Shameer Kolothum +Message-Id: <20190918130633.4872-11-shameerali.kolothum.thodi@huawei.com> +Acked-by: Peter Maydell +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Igor Mammedov +--- + tests/bios-tables-test-allowed-diff.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/tests/bios-tables-test-allowed-diff.h b/tests/bios-tables-test-allowed-diff.h +index 32a401ae35..3776dd2f3d 100644 +--- a/tests/bios-tables-test-allowed-diff.h ++++ b/tests/bios-tables-test-allowed-diff.h +@@ -1,4 +1,17 @@ + /* List of comma-separated changed AML files to ignore */ + "tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/APIC.memhp", ++"tests/data/acpi/virt/APIC.numamem", + "tests/data/acpi/virt/DSDT.memhp", + "tests/data/acpi/virt/DSDT.numamem", ++"tests/data/acpi/virt/FACP.memhp", ++"tests/data/acpi/virt/FACP.numamem", ++"tests/data/acpi/virt/GTDT.memhp", ++"tests/data/acpi/virt/GTDT.numamem", ++"tests/data/acpi/virt/MCFG.memhp", ++"tests/data/acpi/virt/MCFG.numamem", ++"tests/data/acpi/virt/SLIT.memhp", ++"tests/data/acpi/virt/SPCR.memhp", ++"tests/data/acpi/virt/SPCR.numamem", ++"tests/data/acpi/virt/SRAT.memhp", ++"tests/data/acpi/virt/SRAT.numamem", +-- +2.19.1 diff --git a/tests-acpi-add-empty-files.patch b/tests-acpi-add-empty-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..46e51c0de0e0ba84b6edf01fb62e3005acd37697 --- /dev/null +++ b/tests-acpi-add-empty-files.patch @@ -0,0 +1,88 @@ +From c943416df54931cea8b19183fd7c4f2dbd86ec72 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Sun, 29 Sep 2019 10:54:12 -0400 +Subject: [PATCH] tests/acpi: add empty files + +Needed to make tests pass. Will replace with actual files. + +Signed-off-by: Michael S. Tsirkin +--- + tests/data/acpi/virt/APIC.memhp | 0 + tests/data/acpi/virt/APIC.numamem | 0 + tests/data/acpi/virt/DSDT.memhp | 0 + tests/data/acpi/virt/DSDT.numamem | 0 + tests/data/acpi/virt/FACP.memhp | 0 + tests/data/acpi/virt/FACP.numamem | 0 + tests/data/acpi/virt/GTDT.memhp | 0 + tests/data/acpi/virt/GTDT.numamem | 0 + tests/data/acpi/virt/MCFG.memhp | 0 + tests/data/acpi/virt/MCFG.numamem | 0 + tests/data/acpi/virt/SLIT.memhp | 0 + tests/data/acpi/virt/SPCR.memhp | 0 + tests/data/acpi/virt/SPCR.numamem | 0 + tests/data/acpi/virt/SRAT.memhp | 0 + tests/data/acpi/virt/SRAT.numamem | 0 + 15 files changed, 0 insertions(+), 0 deletions(-) + create mode 100644 tests/data/acpi/virt/APIC.memhp + create mode 100644 tests/data/acpi/virt/APIC.numamem + create mode 100644 tests/data/acpi/virt/DSDT.memhp + create mode 100644 tests/data/acpi/virt/DSDT.numamem + create mode 100644 tests/data/acpi/virt/FACP.memhp + create mode 100644 tests/data/acpi/virt/FACP.numamem + create mode 100644 tests/data/acpi/virt/GTDT.memhp + create mode 100644 tests/data/acpi/virt/GTDT.numamem + create mode 100644 tests/data/acpi/virt/MCFG.memhp + create mode 100644 tests/data/acpi/virt/MCFG.numamem + create mode 100644 tests/data/acpi/virt/SLIT.memhp + create mode 100644 tests/data/acpi/virt/SPCR.memhp + create mode 100644 tests/data/acpi/virt/SPCR.numamem + create mode 100644 tests/data/acpi/virt/SRAT.memhp + create mode 100644 tests/data/acpi/virt/SRAT.numamem + +diff --git a/tests/data/acpi/virt/APIC.memhp b/tests/data/acpi/virt/APIC.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/APIC.numamem b/tests/data/acpi/virt/APIC.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/virt/DSDT.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/FACP.memhp b/tests/data/acpi/virt/FACP.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/FACP.numamem b/tests/data/acpi/virt/FACP.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/GTDT.memhp b/tests/data/acpi/virt/GTDT.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/GTDT.numamem b/tests/data/acpi/virt/GTDT.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/MCFG.memhp b/tests/data/acpi/virt/MCFG.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/MCFG.numamem b/tests/data/acpi/virt/MCFG.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/SLIT.memhp b/tests/data/acpi/virt/SLIT.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/SPCR.memhp b/tests/data/acpi/virt/SPCR.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/SPCR.numamem b/tests/data/acpi/virt/SPCR.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/SRAT.memhp b/tests/data/acpi/virt/SRAT.memhp +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tests/data/acpi/virt/SRAT.numamem b/tests/data/acpi/virt/SRAT.numamem +new file mode 100644 +index 0000000000..e69de29bb2 +-- +2.19.1 diff --git a/tests-allow-empty-expected-files.patch b/tests-allow-empty-expected-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..615fb2121d3ffd93d926fe4f3e9623ffce16eb88 --- /dev/null +++ b/tests-allow-empty-expected-files.patch @@ -0,0 +1,31 @@ +From 2ab0636e0c8fcb8b5b1b222f0d5ae7f4dfc663c5 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Sat, 5 Oct 2019 17:09:17 -0400 +Subject: [PATCH] tests: allow empty expected files + +An empty expected file is a handy way to seed the files +without creating merge conflicts. + +Signed-off-by: Michael S. Tsirkin +--- + tests/bios-tables-test.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c +index a356ac3489..53a91a8067 100644 +--- a/tests/bios-tables-test.c ++++ b/tests/bios-tables-test.c +@@ -334,7 +334,10 @@ try_again: + g_assert(ret); + g_assert_no_error(error); + g_assert(exp_sdt.aml); +- g_assert(exp_sdt.aml_len); ++ if (!exp_sdt.aml_len) { ++ fprintf(stderr, "Warning! zero length expected file '%s'\n", ++ aml_file); ++ } + + g_array_append_val(exp_tables, exp_sdt); + } +-- +2.19.1 diff --git a/tests-allow-filtering-crypto-cipher-benchmark-tests.patch b/tests-allow-filtering-crypto-cipher-benchmark-tests.patch new file mode 100644 index 0000000000000000000000000000000000000000..51f6b70461b0ade8ff80b2f8ac0302546593228f --- /dev/null +++ b/tests-allow-filtering-crypto-cipher-benchmark-tests.patch @@ -0,0 +1,56 @@ +From c2a6b4b3204aef2efc39f1b59bc110b54ca24587 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Tue, 15 Oct 2019 11:19:29 +0100 +Subject: [PATCH] tests: allow filtering crypto cipher benchmark tests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add support for specifying a cipher mode and chunk size as argv to +filter which combinations are benchmarked. For example to only +benchmark XTS mode with 512 byte chunks: + + ./tests/benchmark-crypto-cipher xts 512 + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefano Garzarella +Signed-off-by: Daniel P. Berrangé +--- + tests/benchmark-crypto-cipher.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c +index cb6b7200a5..53032334ec 100644 +--- a/tests/benchmark-crypto-cipher.c ++++ b/tests/benchmark-crypto-cipher.c +@@ -163,15 +163,26 @@ static void test_cipher_speed_xts_aes_256(const void *opaque) + + int main(int argc, char **argv) + { ++ char *alg = NULL; ++ char *size = NULL; + g_test_init(&argc, &argv, NULL); + g_assert(qcrypto_init(NULL) == 0); + + #define ADD_TEST(mode, cipher, keysize, chunk) \ +- g_test_add_data_func( \ ++ if ((!alg || g_str_equal(alg, #mode)) && \ ++ (!size || g_str_equal(size, #chunk))) \ ++ g_test_add_data_func( \ + "/crypto/cipher/" #mode "-" #cipher "-" #keysize "/chunk-" #chunk, \ + (void *)chunk, \ + test_cipher_speed_ ## mode ## _ ## cipher ## _ ## keysize) + ++ if (argc >= 2) { ++ alg = argv[1]; ++ } ++ if (argc >= 3) { ++ size = argv[2]; ++ } ++ + #define ADD_TESTS(chunk) \ + do { \ + ADD_TEST(ecb, aes, 128, chunk); \ +-- +2.27.0 + diff --git a/tests-benchmark-crypto-with-fixed-data-size-not-time.patch b/tests-benchmark-crypto-with-fixed-data-size-not-time.patch new file mode 100644 index 0000000000000000000000000000000000000000..8841294a8a43877948bd2c74228794aafdaf0114 --- /dev/null +++ b/tests-benchmark-crypto-with-fixed-data-size-not-time.patch @@ -0,0 +1,150 @@ +From c151519a7f5c08dde9a32534bc485588a5793967 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Oct 2019 14:22:19 +0100 +Subject: [PATCH] tests: benchmark crypto with fixed data size, not time period +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently the crypto benchmarks are processing data in varying chunk +sizes, over a fixed time period. This turns out to be a terrible idea +because with small chunk sizes the overhead of checking the elapsed +time on each loop iteration masks the true performance. + +Benchmarking over a fixed data size avoids the loop running any system +calls which can interfere with the performance measurements. + +Before this change + +Enc chunk 512 bytes 2283.47 MB/sec Dec chunk 512 bytes 2236.23 MB/sec OK +Enc chunk 4096 bytes 2744.97 MB/sec Dec chunk 4096 bytes 2614.71 MB/sec OK +Enc chunk 16384 bytes 2777.53 MB/sec Dec chunk 16384 bytes 2678.44 MB/sec OK +Enc chunk 65536 bytes 2809.34 MB/sec Dec chunk 65536 bytes 2699.47 MB/sec OK + +After this change + +Enc chunk 512 bytes 2058.22 MB/sec Dec chunk 512 bytes 2030.11 MB/sec OK +Enc chunk 4096 bytes 2699.27 MB/sec Dec chunk 4096 bytes 2573.78 MB/sec OK +Enc chunk 16384 bytes 2748.52 MB/sec Dec chunk 16384 bytes 2653.76 MB/sec OK +Enc chunk 65536 bytes 2814.08 MB/sec Dec chunk 65536 bytes 2712.74 MB/sec OK + +The actual crypto performance hasn't changed, which shows how +significant the mis-measurement has been for small data sizes. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Stefano Garzarella +Signed-off-by: Daniel P. Berrangé +--- + tests/benchmark-crypto-cipher.c | 26 ++++++++++++++------------ + tests/benchmark-crypto-hash.c | 17 +++++++++-------- + 2 files changed, 23 insertions(+), 20 deletions(-) + +diff --git a/tests/benchmark-crypto-cipher.c b/tests/benchmark-crypto-cipher.c +index 67fdf8c31d..cb6b7200a5 100644 +--- a/tests/benchmark-crypto-cipher.c ++++ b/tests/benchmark-crypto-cipher.c +@@ -21,11 +21,12 @@ static void test_cipher_speed(size_t chunk_size, + { + QCryptoCipher *cipher; + Error *err = NULL; +- double total = 0.0; + uint8_t *key = NULL, *iv = NULL; + uint8_t *plaintext = NULL, *ciphertext = NULL; + size_t nkey; + size_t niv; ++ const size_t total = 2 * GiB; ++ size_t remain; + + if (!qcrypto_cipher_supports(alg, mode)) { + return; +@@ -58,33 +59,34 @@ static void test_cipher_speed(size_t chunk_size, + &err) == 0); + + g_test_timer_start(); +- do { ++ remain = total; ++ while (remain) { + g_assert(qcrypto_cipher_encrypt(cipher, + plaintext, + ciphertext, + chunk_size, + &err) == 0); +- total += chunk_size; +- } while (g_test_timer_elapsed() < 1.0); ++ remain -= chunk_size; ++ } ++ g_test_timer_elapsed(); + +- total /= MiB; + g_print("Enc chunk %zu bytes ", chunk_size); +- g_print("%.2f MB/sec ", total / g_test_timer_last()); ++ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); + +- total = 0.0; + g_test_timer_start(); +- do { ++ remain = total; ++ while (remain) { + g_assert(qcrypto_cipher_decrypt(cipher, + plaintext, + ciphertext, + chunk_size, + &err) == 0); +- total += chunk_size; +- } while (g_test_timer_elapsed() < 1.0); ++ remain -= chunk_size; ++ } ++ g_test_timer_elapsed(); + +- total /= MiB; + g_print("Dec chunk %zu bytes ", chunk_size); +- g_print("%.2f MB/sec ", total / g_test_timer_last()); ++ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); + + qcrypto_cipher_free(cipher); + g_free(plaintext); +diff --git a/tests/benchmark-crypto-hash.c b/tests/benchmark-crypto-hash.c +index 9b6f7a9155..7f659f7323 100644 +--- a/tests/benchmark-crypto-hash.c ++++ b/tests/benchmark-crypto-hash.c +@@ -20,7 +20,8 @@ static void test_hash_speed(const void *opaque) + size_t chunk_size = (size_t)opaque; + uint8_t *in = NULL, *out = NULL; + size_t out_len = 0; +- double total = 0.0; ++ const size_t total = 2 * GiB; ++ size_t remain; + struct iovec iov; + int ret; + +@@ -31,20 +32,20 @@ static void test_hash_speed(const void *opaque) + iov.iov_len = chunk_size; + + g_test_timer_start(); +- do { ++ remain = total; ++ while (remain) { + ret = qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, + &iov, 1, &out, &out_len, + NULL); + g_assert(ret == 0); + +- total += chunk_size; +- } while (g_test_timer_elapsed() < 5.0); ++ remain -= chunk_size; ++ } ++ g_test_timer_elapsed(); + +- total /= MiB; + g_print("sha256: "); +- g_print("Testing chunk_size %zu bytes ", chunk_size); +- g_print("done: %.2f MB in %.2f secs: ", total, g_test_timer_last()); +- g_print("%.2f MB/sec\n", total / g_test_timer_last()); ++ g_print("Hash %zu GB chunk size %zu bytes ", total / GiB, chunk_size); ++ g_print("%.2f MB/sec ", (double)total / MiB / g_test_timer_last()); + + g_free(out); + g_free(in); +-- +2.27.0 + diff --git a/tests-bios-tables-test-disable-this-testcase.patch b/tests-bios-tables-test-disable-this-testcase.patch new file mode 100644 index 0000000000000000000000000000000000000000..993fee935546735c16bfe9a30ff856ac135f4d53 --- /dev/null +++ b/tests-bios-tables-test-disable-this-testcase.patch @@ -0,0 +1,48 @@ +From 0814ef80cdf212c68b73fc1fbad4eeece3560ef9 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 15 Apr 2020 19:52:09 +0800 +Subject: [PATCH] tests/bios-tables-test: disable this testcase + +We will change ARM virt ACPI FACP and PPTT table in order to +support CPU topology information presentation. However our +change make this testcase fail since we changed the table +totally and we cannot apply patch with rpmbuild system. + +Signed-off-by: Ying Fang +--- + tests/Makefile.include | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/tests/Makefile.include b/tests/Makefile.include +index fd7fdb86..d8cf00c1 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -164,7 +164,7 @@ check-qtest-i386-y += tests/ide-test$(EXESUF) + check-qtest-i386-y += tests/ahci-test$(EXESUF) + check-qtest-i386-y += tests/hd-geo-test$(EXESUF) + check-qtest-i386-y += tests/boot-order-test$(EXESUF) +-check-qtest-i386-y += tests/bios-tables-test$(EXESUF) ++# check-qtest-i386-y += tests/bios-tables-test$(EXESUF) + check-qtest-i386-$(CONFIG_SGA) += tests/boot-serial-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/pxe-test$(EXESUF) + check-qtest-i386-y += tests/rtc-test$(EXESUF) +@@ -269,7 +269,7 @@ check-qtest-aarch64-y += tests/boot-serial-test$(EXESUF) + check-qtest-aarch64-y += tests/migration-test$(EXESUF) + # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make test unconditional + ifneq ($(ARCH),arm) +-check-qtest-aarch64-y += tests/bios-tables-test$(EXESUF) ++#check-qtest-aarch64-y += tests/bios-tables-test$(EXESUF) + endif + + check-qtest-microblazeel-y += $(check-qtest-microblaze-y) +@@ -783,7 +783,7 @@ tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o + tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o + tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) + tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y) +-tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ ++#tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ + tests/boot-sector.o tests/acpi-utils.o $(libqos-obj-y) + tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) + tests/microbit-test$(EXESUF): tests/microbit-test.o +-- +2.23.0 diff --git a/tests-document-how-to-update-acpi-tables.patch b/tests-document-how-to-update-acpi-tables.patch new file mode 100644 index 0000000000000000000000000000000000000000..c961069b6e77c2a193b34d606466f04c7b059611 --- /dev/null +++ b/tests-document-how-to-update-acpi-tables.patch @@ -0,0 +1,53 @@ +From d9642ad522d34f0d803a87654a2c258baf1070dd Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Sat, 5 Oct 2019 17:25:55 -0400 +Subject: [PATCH] tests: document how to update acpi tables + +Looks like no one understands how to do it. +Document the process. + +Signed-off-by: Michael S. Tsirkin +--- + tests/bios-tables-test.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c +index 5e177b7155..d47ee9be99 100644 +--- a/tests/bios-tables-test.c ++++ b/tests/bios-tables-test.c +@@ -10,6 +10,33 @@ + * See the COPYING file in the top-level directory. + */ + ++/* ++ * How to add or update the tests: ++ * Contributor: ++ * 1. add empty files for new tables, if any, under tests/data/acpi ++ * 2. list any changed files in tests/bios-tables-test-allowed-diff.h ++ * 3. commit the above *before* making changes that affect the tables ++ * Maintainer: ++ * After 1-3 above tests will pass but ignore differences with the expected files. ++ * You will also notice that tests/bios-tables-test-allowed-diff.h lists ++ * a bunch of files. This is your hint that you need to do the below: ++ * 4. Run ++ * make check V=1 ++ * this will produce a bunch of warnings about differences ++ * beween actual and expected ACPI tables. If you have IASL installed, ++ * they will also be disassembled so you can look at the disassembled ++ * output. If not - disassemble them yourself in any way you like. ++ * Look at the differences - make sure they make sense and match what the ++ * changes you are merging are supposed to do. ++ * ++ * 5. From build directory, run: ++ * $(SRC_PATH)/tests/data/acpi/rebuild-expected-aml.sh ++ * 6. Now commit any changes. ++ * 7. Before doing a pull request, make sure tests/bios-tables-test-allowed-diff.h ++ * is empty - this will ensure following changes to ACPI tables will ++ * be noticed. ++ */ ++ + #include "qemu/osdep.h" + #include + #include "qemu-common.h" +-- +2.19.1 diff --git a/tftp-check-tftp_input-buffer-size.patch b/tftp-check-tftp_input-buffer-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c642f67211af98d470831738cc1cda621461420 --- /dev/null +++ b/tftp-check-tftp_input-buffer-size.patch @@ -0,0 +1,37 @@ +From 968656cf302ba7f8a3dfaf1013f7d8e80663e63e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 16:34:30 +0400 +Subject: [PATCH 5/6] tftp: check tftp_input buffer size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes: CVE-2021-3595 +Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/46 + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/tftp.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +index 2b4176cc..035a0cab 100644 +--- a/slirp/src/tftp.c ++++ b/slirp/src/tftp.c +@@ -449,7 +449,11 @@ static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, + + void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) + { +- struct tftp_t *tp = (struct tftp_t *)m->m_data; ++ struct tftp_t *tp = mtod_check(m, offsetof(struct tftp_t, x.tp_buf)); ++ ++ if (tp == NULL) { ++ return; ++ } + + switch (ntohs(tp->tp_op)) { + case TFTP_RRQ: +-- +2.27.0 + diff --git a/tftp-introduce-a-header-structure.patch b/tftp-introduce-a-header-structure.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f9c7905bdd9578fcee1edb6050e316cf578db4c --- /dev/null +++ b/tftp-introduce-a-header-structure.patch @@ -0,0 +1,252 @@ +From 4330205483be65148e365d968d21efc3f56c4228 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 20:01:20 +0400 +Subject: [PATCH 6/6] tftp: introduce a header structure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Instead of using a composed structure and potentially reading past the +incoming buffer, use a different structure for the header. + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/tftp.c | 61 ++++++++++++++++++++++++------------------------ + slirp/src/tftp.h | 6 ++++- + 2 files changed, 36 insertions(+), 31 deletions(-) + +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +index 035a0cab..ed89e0b5 100644 +--- a/slirp/src/tftp.c ++++ b/slirp/src/tftp.c +@@ -50,7 +50,7 @@ static void tftp_session_terminate(struct tftp_session *spt) + } + + static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, +- struct tftp_t *tp) ++ struct tftphdr *hdr) + { + struct tftp_session *spt; + int k; +@@ -75,7 +75,7 @@ found: + memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); + spt->fd = -1; + spt->block_size = 512; +- spt->client_port = tp->udp.uh_sport; ++ spt->client_port = hdr->udp.uh_sport; + spt->slirp = slirp; + + tftp_session_update(spt); +@@ -84,7 +84,7 @@ found: + } + + static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, +- struct tftp_t *tp) ++ struct tftphdr *hdr) + { + struct tftp_session *spt; + int k; +@@ -94,7 +94,7 @@ static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, + + if (tftp_session_in_use(spt)) { + if (sockaddr_equal(&spt->client_addr, srcsas)) { +- if (spt->client_port == tp->udp.uh_sport) { ++ if (spt->client_port == hdr->udp.uh_sport) { + return k; + } + } +@@ -146,13 +146,13 @@ static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, + } + + static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, +- struct tftp_t *recv_tp) ++ struct tftphdr *hdr) + { + if (spt->client_addr.ss_family == AF_INET6) { + struct sockaddr_in6 sa6, da6; + + sa6.sin6_addr = spt->slirp->vhost_addr6; +- sa6.sin6_port = recv_tp->udp.uh_dport; ++ sa6.sin6_port = hdr->udp.uh_dport; + da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; + da6.sin6_port = spt->client_port; + +@@ -161,7 +161,7 @@ static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, + struct sockaddr_in sa4, da4; + + sa4.sin_addr = spt->slirp->vhost_addr; +- sa4.sin_port = recv_tp->udp.uh_dport; ++ sa4.sin_port = hdr->udp.uh_dport; + da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; + da4.sin_port = spt->client_port; + +@@ -183,7 +183,7 @@ static int tftp_send_oack(struct tftp_session *spt, const char *keys[], + + tp = tftp_prep_mbuf_data(spt, m); + +- tp->tp_op = htons(TFTP_OACK); ++ tp->hdr.tp_op = htons(TFTP_OACK); + for (i = 0; i < nb; i++) { + n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", + keys[i]) + +@@ -193,9 +193,8 @@ static int tftp_send_oack(struct tftp_session *spt, const char *keys[], + 1; + } + +- m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + n - +- sizeof(struct udphdr); +- tftp_udp_output(spt, m, recv_tp); ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, hdr.tp_op) + n; ++ tftp_udp_output(spt, m, &recv_tp->hdr); + + return 0; + } +@@ -216,21 +215,21 @@ static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, + + tp = tftp_prep_mbuf_data(spt, m); + +- tp->tp_op = htons(TFTP_ERROR); ++ tp->hdr.tp_op = htons(TFTP_ERROR); + tp->x.tp_error.tp_error_code = htons(errorcode); + slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), + msg); + + m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + + strlen(msg) - sizeof(struct udphdr); +- tftp_udp_output(spt, m, recv_tp); ++ tftp_udp_output(spt, m, &recv_tp->hdr); + + out: + tftp_session_terminate(spt); + } + + static void tftp_send_next_block(struct tftp_session *spt, +- struct tftp_t *recv_tp) ++ struct tftphdr *hdr) + { + struct mbuf *m; + struct tftp_t *tp; +@@ -244,7 +243,7 @@ static void tftp_send_next_block(struct tftp_session *spt, + + tp = tftp_prep_mbuf_data(spt, m); + +- tp->tp_op = htons(TFTP_DATA); ++ tp->hdr.tp_op = htons(TFTP_DATA); + tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); + + nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, +@@ -262,7 +261,7 @@ static void tftp_send_next_block(struct tftp_session *spt, + + m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - + sizeof(struct udphdr); +- tftp_udp_output(spt, m, recv_tp); ++ tftp_udp_output(spt, m, hdr); + + if (nobytes == spt->block_size) { + tftp_session_update(spt); +@@ -285,12 +284,12 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, + int nb_options = 0; + + /* check if a session already exists and if so terminate it */ +- s = tftp_session_find(slirp, srcsas, tp); ++ s = tftp_session_find(slirp, srcsas, &tp->hdr); + if (s >= 0) { + tftp_session_terminate(&slirp->tftp_sessions[s]); + } + +- s = tftp_session_allocate(slirp, srcsas, tp); ++ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); + + if (s < 0) { + return; +@@ -416,29 +415,29 @@ static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, + } + + spt->block_nr = 0; +- tftp_send_next_block(spt, tp); ++ tftp_send_next_block(spt, &tp->hdr); + } + + static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, +- struct tftp_t *tp, int pktlen) ++ struct tftphdr *hdr) + { + int s; + +- s = tftp_session_find(slirp, srcsas, tp); ++ s = tftp_session_find(slirp, srcsas, hdr); + + if (s < 0) { + return; + } + +- tftp_send_next_block(&slirp->tftp_sessions[s], tp); ++ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); + } + + static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, +- struct tftp_t *tp, int pktlen) ++ struct tftphdr *hdr) + { + int s; + +- s = tftp_session_find(slirp, srcsas, tp); ++ s = tftp_session_find(slirp, srcsas, hdr); + + if (s < 0) { + return; +@@ -449,23 +448,25 @@ static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, + + void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) + { +- struct tftp_t *tp = mtod_check(m, offsetof(struct tftp_t, x.tp_buf)); ++ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); + +- if (tp == NULL) { ++ if (hdr == NULL) { + return; + } + +- switch (ntohs(tp->tp_op)) { ++ switch (ntohs(hdr->tp_op)) { + case TFTP_RRQ: +- tftp_handle_rrq(m->slirp, srcsas, tp, m->m_len); ++ tftp_handle_rrq(m->slirp, srcsas, ++ mtod(m, struct tftp_t *), ++ m->m_len); + break; + + case TFTP_ACK: +- tftp_handle_ack(m->slirp, srcsas, tp, m->m_len); ++ tftp_handle_ack(m->slirp, srcsas, hdr); + break; + + case TFTP_ERROR: +- tftp_handle_error(m->slirp, srcsas, tp, m->m_len); ++ tftp_handle_error(m->slirp, srcsas, hdr); + break; + } + } +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +index c47bb43c..021f6cf1 100644 +--- a/slirp/src/tftp.h ++++ b/slirp/src/tftp.h +@@ -18,9 +18,13 @@ + #define TFTP_FILENAME_MAX 512 + #define TFTP_BLOCKSIZE_MAX 1428 + +-struct tftp_t { ++struct tftphdr { + struct udphdr udp; + uint16_t tp_op; ++} SLIRP_PACKED; ++ ++struct tftp_t { ++ struct tftphdr hdr; + union { + struct { + uint16_t tp_block_nr; +-- +2.27.0 + diff --git a/tpm-Add-the-SysBus-TPM-TIS-device.patch b/tpm-Add-the-SysBus-TPM-TIS-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0a6254025932eb942b3a15d16b66d4808a33f42 --- /dev/null +++ b/tpm-Add-the-SysBus-TPM-TIS-device.patch @@ -0,0 +1,231 @@ +From 4fe655326eeae322b621dcc25c53af722d2e1afa Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Tue, 11 Aug 2020 11:23:34 +0800 +Subject: [PATCH 14/19] tpm: Add the SysBus TPM TIS device + +Introduce the tpm-tis-device which is a sysbus device +and is bound to be used on ARM. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-6-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + hw/tpm/Kconfig | 5 ++ + hw/tpm/Makefile.objs | 1 + + hw/tpm/tpm_tis_sysbus.c | 159 ++++++++++++++++++++++++++++++++++++++++ + include/sysemu/tpm.h | 1 + + 4 files changed, 166 insertions(+) + create mode 100644 hw/tpm/tpm_tis_sysbus.c + +diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig +index 686f8206..4794e7fe 100644 +--- a/hw/tpm/Kconfig ++++ b/hw/tpm/Kconfig +@@ -7,6 +7,11 @@ config TPM_TIS_ISA + depends on TPM && ISA_BUS + select TPM_TIS + ++config TPM_TIS_SYSBUS ++ bool ++ depends on TPM ++ select TPM_TIS ++ + config TPM_TIS + bool + depends on TPM +diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs +index 3ef2036c..f1ec4beb 100644 +--- a/hw/tpm/Makefile.objs ++++ b/hw/tpm/Makefile.objs +@@ -1,6 +1,7 @@ + common-obj-$(CONFIG_TPM) += tpm_util.o + obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o + common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o ++common-obj-$(CONFIG_TPM_TIS_SYSBUS) += tpm_tis_sysbus.o + common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o + common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o + common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o +diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c +new file mode 100644 +index 00000000..18c02aed +--- /dev/null ++++ b/hw/tpm/tpm_tis_sysbus.c +@@ -0,0 +1,159 @@ ++/* ++ * tpm_tis_sysbus.c - QEMU's TPM TIS SYSBUS Device ++ * ++ * Copyright (C) 2006,2010-2013 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * David Safford ++ * ++ * Xen 4 support: Andrease Niederl ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ * Implementation of the TIS interface according to specs found at ++ * http://www.trustedcomputinggroup.org. This implementation currently ++ * supports version 1.3, 21 March 2013 ++ * In the developers menu choose the PC Client section then find the TIS ++ * specification. ++ * ++ * TPM TIS for TPM 2 implementation following TCG PC Client Platform ++ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/qdev-properties.h" ++#include "migration/vmstate.h" ++#include "tpm_util.h" ++#include "hw/sysbus.h" ++#include "tpm_tis.h" ++ ++typedef struct TPMStateSysBus { ++ /*< private >*/ ++ SysBusDevice parent_obj; ++ ++ /*< public >*/ ++ TPMState state; /* not a QOM object */ ++} TPMStateSysBus; ++ ++#define TPM_TIS_SYSBUS(obj) OBJECT_CHECK(TPMStateSysBus, (obj), TYPE_TPM_TIS_SYSBUS) ++ ++static int tpm_tis_pre_save_sysbus(void *opaque) ++{ ++ TPMStateSysBus *sbdev = opaque; ++ ++ return tpm_tis_pre_save(&sbdev->state); ++} ++ ++static const VMStateDescription vmstate_tpm_tis_sysbus = { ++ .name = "tpm-tis", ++ .version_id = 0, ++ .pre_save = tpm_tis_pre_save_sysbus, ++ .fields = (VMStateField[]) { ++ VMSTATE_BUFFER(state.buffer, TPMStateSysBus), ++ VMSTATE_UINT16(state.rw_offset, TPMStateSysBus), ++ VMSTATE_UINT8(state.active_locty, TPMStateSysBus), ++ VMSTATE_UINT8(state.aborting_locty, TPMStateSysBus), ++ VMSTATE_UINT8(state.next_locty, TPMStateSysBus), ++ ++ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateSysBus, TPM_TIS_NUM_LOCALITIES, ++ 0, vmstate_locty, TPMLocality), ++ ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void tpm_tis_sysbus_request_completed(TPMIf *ti, int ret) ++{ ++ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti); ++ TPMState *s = &sbdev->state; ++ ++ tpm_tis_request_completed(s, ret); ++} ++ ++static enum TPMVersion tpm_tis_sysbus_get_tpm_version(TPMIf *ti) ++{ ++ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti); ++ TPMState *s = &sbdev->state; ++ ++ return tpm_tis_get_tpm_version(s); ++} ++ ++static void tpm_tis_sysbus_reset(DeviceState *dev) ++{ ++ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev); ++ TPMState *s = &sbdev->state; ++ ++ return tpm_tis_reset(s); ++} ++ ++static Property tpm_tis_sysbus_properties[] = { ++ DEFINE_PROP_UINT32("irq", TPMStateSysBus, state.irq_num, TPM_TIS_IRQ), ++ DEFINE_PROP_TPMBE("tpmdev", TPMStateSysBus, state.be_driver), ++ DEFINE_PROP_BOOL("ppi", TPMStateSysBus, state.ppi_enabled, true), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void tpm_tis_sysbus_initfn(Object *obj) ++{ ++ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(obj); ++ TPMState *s = &sbdev->state; ++ ++ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, ++ s, "tpm-tis-mmio", ++ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); ++ ++ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio); ++ sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq); ++} ++ ++static void tpm_tis_sysbus_realizefn(DeviceState *dev, Error **errp) ++{ ++ TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev); ++ TPMState *s = &sbdev->state; ++ ++ if (!tpm_find()) { ++ error_setg(errp, "at most one TPM device is permitted"); ++ return; ++ } ++ ++ if (!s->be_driver) { ++ error_setg(errp, "'tpmdev' property is required"); ++ return; ++ } ++} ++ ++static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ TPMIfClass *tc = TPM_IF_CLASS(klass); ++ ++ dc->props = tpm_tis_sysbus_properties; ++ dc->vmsd = &vmstate_tpm_tis_sysbus; ++ tc->model = TPM_MODEL_TPM_TIS; ++ dc->realize = tpm_tis_sysbus_realizefn; ++ dc->user_creatable = true; ++ dc->reset = tpm_tis_sysbus_reset; ++ tc->request_completed = tpm_tis_sysbus_request_completed; ++ tc->get_version = tpm_tis_sysbus_get_tpm_version; ++} ++ ++static const TypeInfo tpm_tis_sysbus_info = { ++ .name = TYPE_TPM_TIS_SYSBUS, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(TPMStateSysBus), ++ .instance_init = tpm_tis_sysbus_initfn, ++ .class_init = tpm_tis_sysbus_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_TPM_IF }, ++ { } ++ } ++}; ++ ++static void tpm_tis_sysbus_register(void) ++{ ++ type_register_static(&tpm_tis_sysbus_info); ++} ++ ++type_init(tpm_tis_sysbus_register) +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 1691b92c..f37851b1 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -44,6 +44,7 @@ typedef struct TPMIfClass { + } TPMIfClass; + + #define TYPE_TPM_TIS_ISA "tpm-tis" ++#define TYPE_TPM_TIS_SYSBUS "tpm-tis-device" + #define TYPE_TPM_CRB "tpm-crb" + #define TYPE_TPM_SPAPR "tpm-spapr" + +-- +2.23.0 + diff --git a/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch b/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..93139b5e7f8284cecf6faa9930eaa8e802db13d9 --- /dev/null +++ b/tpm-Move-tpm_tis_show_buffer-to-tpm_util.c.patch @@ -0,0 +1,146 @@ +From c6cf45f38cb6e28cf4db42296fedcd5f26ca610b Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Tue, 21 Jan 2020 10:29:30 -0500 +Subject: [PATCH 03/19] tpm: Move tpm_tis_show_buffer to tpm_util.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Stefan Berger +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: David Gibson +Message-Id: <20200121152935.649898-2-stefanb@linux.ibm.com> +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + hw/tpm/tpm_tis.c | 32 ++++---------------------------- + hw/tpm/tpm_util.c | 25 +++++++++++++++++++++++++ + hw/tpm/tpm_util.h | 3 +++ + hw/tpm/trace-events | 2 +- + 4 files changed, 33 insertions(+), 29 deletions(-) + +diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c +index d6b32128..96a9ac48 100644 +--- a/hw/tpm/tpm_tis.c ++++ b/hw/tpm/tpm_tis.c +@@ -104,30 +104,6 @@ static uint8_t tpm_tis_locality_from_addr(hwaddr addr) + return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); + } + +-static void tpm_tis_show_buffer(const unsigned char *buffer, +- size_t buffer_size, const char *string) +-{ +- size_t len, i; +- char *line_buffer, *p; +- +- len = MIN(tpm_cmd_get_size(buffer), buffer_size); +- +- /* +- * allocate enough room for 3 chars per buffer entry plus a +- * newline after every 16 chars and a final null terminator. +- */ +- line_buffer = g_malloc(len * 3 + (len / 16) + 1); +- +- for (i = 0, p = line_buffer; i < len; i++) { +- if (i && !(i % 16)) { +- p += sprintf(p, "\n"); +- } +- p += sprintf(p, "%.2X ", buffer[i]); +- } +- trace_tpm_tis_show_buffer(string, len, line_buffer); +- +- g_free(line_buffer); +-} + + /* + * Set the given flags in the STS register by clearing the register but +@@ -153,8 +129,8 @@ static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) + */ + static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) + { +- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { +- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); ++ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); + } + + /* +@@ -322,8 +298,8 @@ static void tpm_tis_request_completed(TPMIf *ti, int ret) + s->loc[locty].state = TPM_TIS_STATE_COMPLETION; + s->rw_offset = 0; + +- if (trace_event_get_state_backends(TRACE_TPM_TIS_SHOW_BUFFER)) { +- tpm_tis_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); ++ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); + } + + if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { +diff --git a/hw/tpm/tpm_util.c b/hw/tpm/tpm_util.c +index ee41757e..8643eb50 100644 +--- a/hw/tpm/tpm_util.c ++++ b/hw/tpm/tpm_util.c +@@ -350,3 +350,28 @@ void tpm_sized_buffer_reset(TPMSizedBuffer *tsb) + tsb->buffer = NULL; + tsb->size = 0; + } ++ ++void tpm_util_show_buffer(const unsigned char *buffer, ++ size_t buffer_size, const char *string) ++{ ++ size_t len, i; ++ char *line_buffer, *p; ++ ++ len = MIN(tpm_cmd_get_size(buffer), buffer_size); ++ ++ /* ++ * allocate enough room for 3 chars per buffer entry plus a ++ * newline after every 16 chars and a final null terminator. ++ */ ++ line_buffer = g_malloc(len * 3 + (len / 16) + 1); ++ ++ for (i = 0, p = line_buffer; i < len; i++) { ++ if (i && !(i % 16)) { ++ p += sprintf(p, "\n"); ++ } ++ p += sprintf(p, "%.2X ", buffer[i]); ++ } ++ trace_tpm_util_show_buffer(string, len, line_buffer); ++ ++ g_free(line_buffer); ++} +diff --git a/hw/tpm/tpm_util.h b/hw/tpm/tpm_util.h +index f397ac21..7889081f 100644 +--- a/hw/tpm/tpm_util.h ++++ b/hw/tpm/tpm_util.h +@@ -79,4 +79,7 @@ typedef struct TPMSizedBuffer { + + void tpm_sized_buffer_reset(TPMSizedBuffer *tsb); + ++void tpm_util_show_buffer(const unsigned char *buffer, ++ size_t buffer_size, const char *string); ++ + #endif /* TPM_TPM_UTIL_H */ +diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events +index 0b94aa15..82c45ee5 100644 +--- a/hw/tpm/trace-events ++++ b/hw/tpm/trace-events +@@ -14,6 +14,7 @@ tpm_util_get_buffer_size_len(uint32_t len, size_t expected) "tpm_resp->len = %u, + tpm_util_get_buffer_size_hdr_len2(uint32_t len, size_t expected) "tpm2_resp->hdr.len = %u, expected = %zu" + tpm_util_get_buffer_size_len2(uint32_t len, size_t expected) "tpm2_resp->len = %u, expected = %zu" + tpm_util_get_buffer_size(size_t len) "buffersize of device: %zu" ++tpm_util_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" + + # tpm_emulator.c + tpm_emulator_set_locality(uint8_t locty) "setting locality to %d" +@@ -36,7 +37,6 @@ tpm_emulator_pre_save(void) "" + tpm_emulator_inst_init(void) "" + + # tpm_tis.c +-tpm_tis_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\nbuf: %s" + tpm_tis_raise_irq(uint32_t irqmask) "Raising IRQ for flag 0x%08x" + tpm_tis_new_active_locality(uint8_t locty) "Active locality is now %d" + tpm_tis_abort(uint8_t locty) "New active locality is %d" +-- +2.23.0 + diff --git a/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch b/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch new file mode 100644 index 0000000000000000000000000000000000000000..97dcaa000b251bd8a4390a5c68e75011aef9401f --- /dev/null +++ b/tpm-Separate-TPM_TIS-and-TPM_TIS_ISA-configs.patch @@ -0,0 +1,108 @@ +From 1eca7dbacabbc8ccc737f320839e7800fef5dfa1 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Tue, 11 Aug 2020 12:42:31 +0800 +Subject: [PATCH 13/19] tpm: Separate TPM_TIS and TPM_TIS_ISA configs + MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 + Content-Transfer-Encoding: 8bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Let's separate the compilation of tpm_tis_common.c from +the compilation of tpm_tis_isa.c + +The common part will be also compiled along with the +tpm_tis_sysbus device. + +Signed-off-by: Eric Auger +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-5-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + default-configs/i386-softmmu.mak | 2 +- + hw/i386/Kconfig | 2 +- + hw/tpm/Kconfig | 7 ++++++- + hw/tpm/Makefile.objs | 3 ++- + tests/Makefile.include | 4 ++-- + 5 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak +index cd5ea391..bdeef670 100644 +--- a/default-configs/i386-softmmu.mak ++++ b/default-configs/i386-softmmu.mak +@@ -17,7 +17,7 @@ + #CONFIG_SGA=n + #CONFIG_TEST_DEVICES=n + #CONFIG_TPM_CRB=n +-#CONFIG_TPM_TIS=n ++#CONFIG_TPM_TIS_ISA=n + #CONFIG_VTD=n + + # Boards: +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 63504380..60334504 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -17,7 +17,7 @@ config PC + imply SGA + imply TEST_DEVICES + imply TPM_CRB +- imply TPM_TIS ++ imply TPM_TIS_ISA + imply VGA_PCI + imply VIRTIO_VGA + select FDC +diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig +index 9e67d990..686f8206 100644 +--- a/hw/tpm/Kconfig ++++ b/hw/tpm/Kconfig +@@ -2,9 +2,14 @@ config TPMDEV + bool + depends on TPM + +-config TPM_TIS ++config TPM_TIS_ISA + bool + depends on TPM && ISA_BUS ++ select TPM_TIS ++ ++config TPM_TIS ++ bool ++ depends on TPM + select TPMDEV + + config TPM_CRB +diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs +index fcc4c2f2..3ef2036c 100644 +--- a/hw/tpm/Makefile.objs ++++ b/hw/tpm/Makefile.objs +@@ -1,6 +1,7 @@ + common-obj-$(CONFIG_TPM) += tpm_util.o + obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o +-common-obj-$(CONFIG_TPM_TIS) += tpm_tis_isa.o tpm_tis_common.o ++common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o ++common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o + common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o + common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o + common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o +diff --git a/tests/Makefile.include b/tests/Makefile.include +index f3273ad3..c151de64 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -190,8 +190,8 @@ check-qtest-i386-y += tests/q35-test$(EXESUF) + check-qtest-i386-y += tests/vmgenid-test$(EXESUF) + check-qtest-i386-$(CONFIG_TPM_CRB) += tests/tpm-crb-swtpm-test$(EXESUF) + check-qtest-i386-$(CONFIG_TPM_CRB) += tests/tpm-crb-test$(EXESUF) +-check-qtest-i386-$(CONFIG_TPM_TIS) += tests/tpm-tis-swtpm-test$(EXESUF) +-check-qtest-i386-$(CONFIG_TPM_TIS) += tests/tpm-tis-test$(EXESUF) ++check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tests/tpm-tis-swtpm-test$(EXESUF) ++check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tests/tpm-tis-test$(EXESUF) + check-qtest-i386-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF) + check-qtest-i386-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF) + check-qtest-i386-$(CONFIG_RTL8139_PCI) += tests/test-filter-redirector$(EXESUF) +-- +2.23.0 + diff --git a/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch b/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch new file mode 100644 index 0000000000000000000000000000000000000000..32f180c98d784b1478268a768b4caed6c8a3fa23 --- /dev/null +++ b/tpm-Separate-tpm_tis-common-functions-from-isa-code.patch @@ -0,0 +1,1194 @@ +From 425f6bc8392c71d2f29b572d19232785d0ab0b73 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Tue, 11 Aug 2020 02:55:35 +0000 +Subject: [PATCH 12/19] tpm: Separate tpm_tis common functions from isa code + +Move the device agnostic code into tpm_tis_common.c and +put the ISA device specific code into tpm_tis_isa.c + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-4-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + hw/tpm/Makefile.objs | 2 +- + hw/tpm/{tpm_tis.c => tpm_tis.c.orig} | 0 + hw/tpm/tpm_tis.h | 91 +++ + hw/tpm/tpm_tis_common.c | 869 +++++++++++++++++++++++++++ + hw/tpm/tpm_tis_isa.c | 170 ++++++ + 5 files changed, 1131 insertions(+), 1 deletion(-) + rename hw/tpm/{tpm_tis.c => tpm_tis.c.orig} (100%) + create mode 100644 hw/tpm/tpm_tis.h + create mode 100644 hw/tpm/tpm_tis_common.c + create mode 100644 hw/tpm/tpm_tis_isa.c + +diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs +index 85eb99ae..fcc4c2f2 100644 +--- a/hw/tpm/Makefile.objs ++++ b/hw/tpm/Makefile.objs +@@ -1,6 +1,6 @@ + common-obj-$(CONFIG_TPM) += tpm_util.o + obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o +-common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o ++common-obj-$(CONFIG_TPM_TIS) += tpm_tis_isa.o tpm_tis_common.o + common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o + common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o + common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o +diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c.orig +similarity index 100% +rename from hw/tpm/tpm_tis.c +rename to hw/tpm/tpm_tis.c.orig +diff --git a/hw/tpm/tpm_tis.h b/hw/tpm/tpm_tis.h +new file mode 100644 +index 00000000..55549893 +--- /dev/null ++++ b/hw/tpm/tpm_tis.h +@@ -0,0 +1,91 @@ ++/* ++ * tpm_tis.h - QEMU's TPM TIS common header ++ * ++ * Copyright (C) 2006,2010-2013 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * David Safford ++ * ++ * Xen 4 support: Andrease Niederl ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ * Implementation of the TIS interface according to specs found at ++ * http://www.trustedcomputinggroup.org. This implementation currently ++ * supports version 1.3, 21 March 2013 ++ * In the developers menu choose the PC Client section then find the TIS ++ * specification. ++ * ++ * TPM TIS for TPM 2 implementation following TCG PC Client Platform ++ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 ++ */ ++#ifndef TPM_TPM_TIS_H ++#define TPM_TPM_TIS_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/tpm_backend.h" ++#include "tpm_ppi.h" ++ ++#define TPM_TIS_NUM_LOCALITIES 5 /* per spec */ ++#define TPM_TIS_LOCALITY_SHIFT 12 ++#define TPM_TIS_NO_LOCALITY 0xff ++ ++#define TPM_TIS_IS_VALID_LOCTY(x) ((x) < TPM_TIS_NUM_LOCALITIES) ++ ++#define TPM_TIS_BUFFER_MAX 4096 ++ ++typedef enum { ++ TPM_TIS_STATE_IDLE = 0, ++ TPM_TIS_STATE_READY, ++ TPM_TIS_STATE_COMPLETION, ++ TPM_TIS_STATE_EXECUTION, ++ TPM_TIS_STATE_RECEPTION, ++} TPMTISState; ++ ++/* locality data -- all fields are persisted */ ++typedef struct TPMLocality { ++ TPMTISState state; ++ uint8_t access; ++ uint32_t sts; ++ uint32_t iface_id; ++ uint32_t inte; ++ uint32_t ints; ++} TPMLocality; ++ ++typedef struct TPMState { ++ MemoryRegion mmio; ++ ++ unsigned char buffer[TPM_TIS_BUFFER_MAX]; ++ uint16_t rw_offset; ++ ++ uint8_t active_locty; ++ uint8_t aborting_locty; ++ uint8_t next_locty; ++ ++ TPMLocality loc[TPM_TIS_NUM_LOCALITIES]; ++ ++ qemu_irq irq; ++ uint32_t irq_num; ++ ++ TPMBackendCmd cmd; ++ ++ TPMBackend *be_driver; ++ TPMVersion be_tpm_version; ++ ++ size_t be_buffer_size; ++ ++ bool ppi_enabled; ++ TPMPPI ppi; ++} TPMState; ++ ++extern const VMStateDescription vmstate_locty; ++extern const MemoryRegionOps tpm_tis_memory_ops; ++ ++int tpm_tis_pre_save(TPMState *s); ++void tpm_tis_reset(TPMState *s); ++enum TPMVersion tpm_tis_get_tpm_version(TPMState *s); ++void tpm_tis_request_completed(TPMState *s, int ret); ++ ++#endif /* TPM_TPM_TIS_H */ +diff --git a/hw/tpm/tpm_tis_common.c b/hw/tpm/tpm_tis_common.c +new file mode 100644 +index 00000000..9a51c71e +--- /dev/null ++++ b/hw/tpm/tpm_tis_common.c +@@ -0,0 +1,869 @@ ++/* ++ * tpm_tis_common.c - QEMU's TPM TIS interface emulator ++ * device agnostic functions ++ * ++ * Copyright (C) 2006,2010-2013 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * David Safford ++ * ++ * Xen 4 support: Andrease Niederl ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ * Implementation of the TIS interface according to specs found at ++ * http://www.trustedcomputinggroup.org. This implementation currently ++ * supports version 1.3, 21 March 2013 ++ * In the developers menu choose the PC Client section then find the TIS ++ * specification. ++ * ++ * TPM TIS for TPM 2 implementation following TCG PC Client Platform ++ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 ++ */ ++#include "qemu/osdep.h" ++#include "hw/isa/isa.h" ++#include "qapi/error.h" ++#include "qemu/module.h" ++ ++#include "hw/acpi/tpm.h" ++#include "hw/pci/pci_ids.h" ++#include "sysemu/tpm_backend.h" ++#include "tpm_int.h" ++#include "tpm_util.h" ++#include "tpm_ppi.h" ++#include "trace.h" ++ ++#include "tpm_tis.h" ++ ++#define DEBUG_TIS 0 ++ ++/* local prototypes */ ++ ++static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, ++ unsigned size); ++ ++/* utility functions */ ++ ++static uint8_t tpm_tis_locality_from_addr(hwaddr addr) ++{ ++ return (uint8_t)((addr >> TPM_TIS_LOCALITY_SHIFT) & 0x7); ++} ++ ++ ++/* ++ * Set the given flags in the STS register by clearing the register but ++ * preserving the SELFTEST_DONE and TPM_FAMILY_MASK flags and then setting ++ * the new flags. ++ * ++ * The SELFTEST_DONE flag is acquired from the backend that determines it by ++ * peeking into TPM commands. ++ * ++ * A VM suspend/resume will preserve the flag by storing it into the VM ++ * device state, but the backend will not remember it when QEMU is started ++ * again. Therefore, we cache the flag here. Once set, it will not be unset ++ * except by a reset. ++ */ ++static void tpm_tis_sts_set(TPMLocality *l, uint32_t flags) ++{ ++ l->sts &= TPM_TIS_STS_SELFTEST_DONE | TPM_TIS_STS_TPM_FAMILY_MASK; ++ l->sts |= flags; ++} ++ ++/* ++ * Send a request to the TPM. ++ */ ++static void tpm_tis_tpm_send(TPMState *s, uint8_t locty) ++{ ++ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); ++ } ++ ++ /* ++ * rw_offset serves as length indicator for length of data; ++ * it's reset when the response comes back ++ */ ++ s->loc[locty].state = TPM_TIS_STATE_EXECUTION; ++ ++ s->cmd = (TPMBackendCmd) { ++ .locty = locty, ++ .in = s->buffer, ++ .in_len = s->rw_offset, ++ .out = s->buffer, ++ .out_len = s->be_buffer_size, ++ }; ++ ++ tpm_backend_deliver_request(s->be_driver, &s->cmd); ++} ++ ++/* raise an interrupt if allowed */ ++static void tpm_tis_raise_irq(TPMState *s, uint8_t locty, uint32_t irqmask) ++{ ++ if (!TPM_TIS_IS_VALID_LOCTY(locty)) { ++ return; ++ } ++ ++ if ((s->loc[locty].inte & TPM_TIS_INT_ENABLED) && ++ (s->loc[locty].inte & irqmask)) { ++ trace_tpm_tis_raise_irq(irqmask); ++ qemu_irq_raise(s->irq); ++ s->loc[locty].ints |= irqmask; ++ } ++} ++ ++static uint32_t tpm_tis_check_request_use_except(TPMState *s, uint8_t locty) ++{ ++ uint8_t l; ++ ++ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { ++ if (l == locty) { ++ continue; ++ } ++ if ((s->loc[l].access & TPM_TIS_ACCESS_REQUEST_USE)) { ++ return 1; ++ } ++ } ++ ++ return 0; ++} ++ ++static void tpm_tis_new_active_locality(TPMState *s, uint8_t new_active_locty) ++{ ++ bool change = (s->active_locty != new_active_locty); ++ bool is_seize; ++ uint8_t mask; ++ ++ if (change && TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { ++ is_seize = TPM_TIS_IS_VALID_LOCTY(new_active_locty) && ++ s->loc[new_active_locty].access & TPM_TIS_ACCESS_SEIZE; ++ ++ if (is_seize) { ++ mask = ~(TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ } else { ++ mask = ~(TPM_TIS_ACCESS_ACTIVE_LOCALITY| ++ TPM_TIS_ACCESS_REQUEST_USE); ++ } ++ /* reset flags on the old active locality */ ++ s->loc[s->active_locty].access &= mask; ++ ++ if (is_seize) { ++ s->loc[s->active_locty].access |= TPM_TIS_ACCESS_BEEN_SEIZED; ++ } ++ } ++ ++ s->active_locty = new_active_locty; ++ ++ trace_tpm_tis_new_active_locality(s->active_locty); ++ ++ if (TPM_TIS_IS_VALID_LOCTY(new_active_locty)) { ++ /* set flags on the new active locality */ ++ s->loc[new_active_locty].access |= TPM_TIS_ACCESS_ACTIVE_LOCALITY; ++ s->loc[new_active_locty].access &= ~(TPM_TIS_ACCESS_REQUEST_USE | ++ TPM_TIS_ACCESS_SEIZE); ++ } ++ ++ if (change) { ++ tpm_tis_raise_irq(s, s->active_locty, TPM_TIS_INT_LOCALITY_CHANGED); ++ } ++} ++ ++/* abort -- this function switches the locality */ ++static void tpm_tis_abort(TPMState *s) ++{ ++ s->rw_offset = 0; ++ ++ trace_tpm_tis_abort(s->next_locty); ++ ++ /* ++ * Need to react differently depending on who's aborting now and ++ * which locality will become active afterwards. ++ */ ++ if (s->aborting_locty == s->next_locty) { ++ s->loc[s->aborting_locty].state = TPM_TIS_STATE_READY; ++ tpm_tis_sts_set(&s->loc[s->aborting_locty], ++ TPM_TIS_STS_COMMAND_READY); ++ tpm_tis_raise_irq(s, s->aborting_locty, TPM_TIS_INT_COMMAND_READY); ++ } ++ ++ /* locality after abort is another one than the current one */ ++ tpm_tis_new_active_locality(s, s->next_locty); ++ ++ s->next_locty = TPM_TIS_NO_LOCALITY; ++ /* nobody's aborting a command anymore */ ++ s->aborting_locty = TPM_TIS_NO_LOCALITY; ++} ++ ++/* prepare aborting current command */ ++static void tpm_tis_prep_abort(TPMState *s, uint8_t locty, uint8_t newlocty) ++{ ++ uint8_t busy_locty; ++ ++ assert(TPM_TIS_IS_VALID_LOCTY(newlocty)); ++ ++ s->aborting_locty = locty; /* may also be TPM_TIS_NO_LOCALITY */ ++ s->next_locty = newlocty; /* locality after successful abort */ ++ ++ /* ++ * only abort a command using an interrupt if currently executing ++ * a command AND if there's a valid connection to the vTPM. ++ */ ++ for (busy_locty = 0; busy_locty < TPM_TIS_NUM_LOCALITIES; busy_locty++) { ++ if (s->loc[busy_locty].state == TPM_TIS_STATE_EXECUTION) { ++ /* ++ * request the backend to cancel. Some backends may not ++ * support it ++ */ ++ tpm_backend_cancel_cmd(s->be_driver); ++ return; ++ } ++ } ++ ++ tpm_tis_abort(s); ++} ++ ++/* ++ * Callback from the TPM to indicate that the response was received. ++ */ ++void tpm_tis_request_completed(TPMState *s, int ret) ++{ ++ uint8_t locty = s->cmd.locty; ++ uint8_t l; ++ ++ assert(TPM_TIS_IS_VALID_LOCTY(locty)); ++ ++ if (s->cmd.selftest_done) { ++ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { ++ s->loc[l].sts |= TPM_TIS_STS_SELFTEST_DONE; ++ } ++ } ++ ++ /* FIXME: report error if ret != 0 */ ++ tpm_tis_sts_set(&s->loc[locty], ++ TPM_TIS_STS_VALID | TPM_TIS_STS_DATA_AVAILABLE); ++ s->loc[locty].state = TPM_TIS_STATE_COMPLETION; ++ s->rw_offset = 0; ++ ++ if (trace_event_get_state_backends(TRACE_TPM_UTIL_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "From TPM"); ++ } ++ ++ if (TPM_TIS_IS_VALID_LOCTY(s->next_locty)) { ++ tpm_tis_abort(s); ++ } ++ ++ tpm_tis_raise_irq(s, locty, ++ TPM_TIS_INT_DATA_AVAILABLE | TPM_TIS_INT_STS_VALID); ++} ++ ++/* ++ * Read a byte of response data ++ */ ++static uint32_t tpm_tis_data_read(TPMState *s, uint8_t locty) ++{ ++ uint32_t ret = TPM_TIS_NO_DATA_BYTE; ++ uint16_t len; ++ ++ if ((s->loc[locty].sts & TPM_TIS_STS_DATA_AVAILABLE)) { ++ len = MIN(tpm_cmd_get_size(&s->buffer), ++ s->be_buffer_size); ++ ++ ret = s->buffer[s->rw_offset++]; ++ if (s->rw_offset >= len) { ++ /* got last byte */ ++ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); ++ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_STS_VALID); ++ } ++ trace_tpm_tis_data_read(ret, s->rw_offset - 1); ++ } ++ ++ return ret; ++} ++ ++#ifdef DEBUG_TIS ++static void tpm_tis_dump_state(TPMState *s, hwaddr addr) ++{ ++ static const unsigned regs[] = { ++ TPM_TIS_REG_ACCESS, ++ TPM_TIS_REG_INT_ENABLE, ++ TPM_TIS_REG_INT_VECTOR, ++ TPM_TIS_REG_INT_STATUS, ++ TPM_TIS_REG_INTF_CAPABILITY, ++ TPM_TIS_REG_STS, ++ TPM_TIS_REG_DID_VID, ++ TPM_TIS_REG_RID, ++ 0xfff}; ++ int idx; ++ uint8_t locty = tpm_tis_locality_from_addr(addr); ++ hwaddr base = addr & ~0xfff; ++ ++ printf("tpm_tis: active locality : %d\n" ++ "tpm_tis: state of locality %d : %d\n" ++ "tpm_tis: register dump:\n", ++ s->active_locty, ++ locty, s->loc[locty].state); ++ ++ for (idx = 0; regs[idx] != 0xfff; idx++) { ++ printf("tpm_tis: 0x%04x : 0x%08x\n", regs[idx], ++ (int)tpm_tis_mmio_read(s, base + regs[idx], 4)); ++ } ++ ++ printf("tpm_tis: r/w offset : %d\n" ++ "tpm_tis: result buffer : ", ++ s->rw_offset); ++ for (idx = 0; ++ idx < MIN(tpm_cmd_get_size(&s->buffer), s->be_buffer_size); ++ idx++) { ++ printf("%c%02x%s", ++ s->rw_offset == idx ? '>' : ' ', ++ s->buffer[idx], ++ ((idx & 0xf) == 0xf) ? "\ntpm_tis: " : ""); ++ } ++ printf("\n"); ++} ++#endif ++ ++/* ++ * Read a register of the TIS interface ++ * See specs pages 33-63 for description of the registers ++ */ ++static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, ++ unsigned size) ++{ ++ TPMState *s = opaque; ++ uint16_t offset = addr & 0xffc; ++ uint8_t shift = (addr & 0x3) * 8; ++ uint32_t val = 0xffffffff; ++ uint8_t locty = tpm_tis_locality_from_addr(addr); ++ uint32_t avail; ++ uint8_t v; ++ ++ if (tpm_backend_had_startup_error(s->be_driver)) { ++ return 0; ++ } ++ ++ switch (offset) { ++ case TPM_TIS_REG_ACCESS: ++ /* never show the SEIZE flag even though we use it internally */ ++ val = s->loc[locty].access & ~TPM_TIS_ACCESS_SEIZE; ++ /* the pending flag is always calculated */ ++ if (tpm_tis_check_request_use_except(s, locty)) { ++ val |= TPM_TIS_ACCESS_PENDING_REQUEST; ++ } ++ val |= !tpm_backend_get_tpm_established_flag(s->be_driver); ++ break; ++ case TPM_TIS_REG_INT_ENABLE: ++ val = s->loc[locty].inte; ++ break; ++ case TPM_TIS_REG_INT_VECTOR: ++ val = s->irq_num; ++ break; ++ case TPM_TIS_REG_INT_STATUS: ++ val = s->loc[locty].ints; ++ break; ++ case TPM_TIS_REG_INTF_CAPABILITY: ++ switch (s->be_tpm_version) { ++ case TPM_VERSION_UNSPEC: ++ val = 0; ++ break; ++ case TPM_VERSION_1_2: ++ val = TPM_TIS_CAPABILITIES_SUPPORTED1_3; ++ break; ++ case TPM_VERSION_2_0: ++ val = TPM_TIS_CAPABILITIES_SUPPORTED2_0; ++ break; ++ } ++ break; ++ case TPM_TIS_REG_STS: ++ if (s->active_locty == locty) { ++ if ((s->loc[locty].sts & TPM_TIS_STS_DATA_AVAILABLE)) { ++ val = TPM_TIS_BURST_COUNT( ++ MIN(tpm_cmd_get_size(&s->buffer), ++ s->be_buffer_size) ++ - s->rw_offset) | s->loc[locty].sts; ++ } else { ++ avail = s->be_buffer_size - s->rw_offset; ++ /* ++ * byte-sized reads should not return 0x00 for 0x100 ++ * available bytes. ++ */ ++ if (size == 1 && avail > 0xff) { ++ avail = 0xff; ++ } ++ val = TPM_TIS_BURST_COUNT(avail) | s->loc[locty].sts; ++ } ++ } ++ break; ++ case TPM_TIS_REG_DATA_FIFO: ++ case TPM_TIS_REG_DATA_XFIFO ... TPM_TIS_REG_DATA_XFIFO_END: ++ if (s->active_locty == locty) { ++ if (size > 4 - (addr & 0x3)) { ++ /* prevent access beyond FIFO */ ++ size = 4 - (addr & 0x3); ++ } ++ val = 0; ++ shift = 0; ++ while (size > 0) { ++ switch (s->loc[locty].state) { ++ case TPM_TIS_STATE_COMPLETION: ++ v = tpm_tis_data_read(s, locty); ++ break; ++ default: ++ v = TPM_TIS_NO_DATA_BYTE; ++ break; ++ } ++ val |= (v << shift); ++ shift += 8; ++ size--; ++ } ++ shift = 0; /* no more adjustments */ ++ } ++ break; ++ case TPM_TIS_REG_INTERFACE_ID: ++ val = s->loc[locty].iface_id; ++ break; ++ case TPM_TIS_REG_DID_VID: ++ val = (TPM_TIS_TPM_DID << 16) | TPM_TIS_TPM_VID; ++ break; ++ case TPM_TIS_REG_RID: ++ val = TPM_TIS_TPM_RID; ++ break; ++#ifdef DEBUG_TIS ++ case TPM_TIS_REG_DEBUG: ++ tpm_tis_dump_state(s, addr); ++ break; ++#endif ++ } ++ ++ if (shift) { ++ val >>= shift; ++ } ++ ++ trace_tpm_tis_mmio_read(size, addr, val); ++ ++ return val; ++} ++ ++/* ++ * Write a value to a register of the TIS interface ++ * See specs pages 33-63 for description of the registers ++ */ ++static void tpm_tis_mmio_write(void *opaque, hwaddr addr, ++ uint64_t val, unsigned size) ++{ ++ TPMState *s = opaque; ++ uint16_t off = addr & 0xffc; ++ uint8_t shift = (addr & 0x3) * 8; ++ uint8_t locty = tpm_tis_locality_from_addr(addr); ++ uint8_t active_locty, l; ++ int c, set_new_locty = 1; ++ uint16_t len; ++ uint32_t mask = (size == 1) ? 0xff : ((size == 2) ? 0xffff : ~0); ++ ++ trace_tpm_tis_mmio_write(size, addr, val); ++ ++ if (locty == 4) { ++ trace_tpm_tis_mmio_write_locty4(); ++ return; ++ } ++ ++ if (tpm_backend_had_startup_error(s->be_driver)) { ++ return; ++ } ++ ++ val &= mask; ++ ++ if (shift) { ++ val <<= shift; ++ mask <<= shift; ++ } ++ ++ mask ^= 0xffffffff; ++ ++ switch (off) { ++ case TPM_TIS_REG_ACCESS: ++ ++ if ((val & TPM_TIS_ACCESS_SEIZE)) { ++ val &= ~(TPM_TIS_ACCESS_REQUEST_USE | ++ TPM_TIS_ACCESS_ACTIVE_LOCALITY); ++ } ++ ++ active_locty = s->active_locty; ++ ++ if ((val & TPM_TIS_ACCESS_ACTIVE_LOCALITY)) { ++ /* give up locality if currently owned */ ++ if (s->active_locty == locty) { ++ trace_tpm_tis_mmio_write_release_locty(locty); ++ ++ uint8_t newlocty = TPM_TIS_NO_LOCALITY; ++ /* anybody wants the locality ? */ ++ for (c = TPM_TIS_NUM_LOCALITIES - 1; c >= 0; c--) { ++ if ((s->loc[c].access & TPM_TIS_ACCESS_REQUEST_USE)) { ++ trace_tpm_tis_mmio_write_locty_req_use(c); ++ newlocty = c; ++ break; ++ } ++ } ++ trace_tpm_tis_mmio_write_next_locty(newlocty); ++ ++ if (TPM_TIS_IS_VALID_LOCTY(newlocty)) { ++ set_new_locty = 0; ++ tpm_tis_prep_abort(s, locty, newlocty); ++ } else { ++ active_locty = TPM_TIS_NO_LOCALITY; ++ } ++ } else { ++ /* not currently the owner; clear a pending request */ ++ s->loc[locty].access &= ~TPM_TIS_ACCESS_REQUEST_USE; ++ } ++ } ++ ++ if ((val & TPM_TIS_ACCESS_BEEN_SEIZED)) { ++ s->loc[locty].access &= ~TPM_TIS_ACCESS_BEEN_SEIZED; ++ } ++ ++ if ((val & TPM_TIS_ACCESS_SEIZE)) { ++ /* ++ * allow seize if a locality is active and the requesting ++ * locality is higher than the one that's active ++ * OR ++ * allow seize for requesting locality if no locality is ++ * active ++ */ ++ while ((TPM_TIS_IS_VALID_LOCTY(s->active_locty) && ++ locty > s->active_locty) || ++ !TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { ++ bool higher_seize = FALSE; ++ ++ /* already a pending SEIZE ? */ ++ if ((s->loc[locty].access & TPM_TIS_ACCESS_SEIZE)) { ++ break; ++ } ++ ++ /* check for ongoing seize by a higher locality */ ++ for (l = locty + 1; l < TPM_TIS_NUM_LOCALITIES; l++) { ++ if ((s->loc[l].access & TPM_TIS_ACCESS_SEIZE)) { ++ higher_seize = TRUE; ++ break; ++ } ++ } ++ ++ if (higher_seize) { ++ break; ++ } ++ ++ /* cancel any seize by a lower locality */ ++ for (l = 0; l < locty; l++) { ++ s->loc[l].access &= ~TPM_TIS_ACCESS_SEIZE; ++ } ++ ++ s->loc[locty].access |= TPM_TIS_ACCESS_SEIZE; ++ ++ trace_tpm_tis_mmio_write_locty_seized(locty, s->active_locty); ++ trace_tpm_tis_mmio_write_init_abort(); ++ ++ set_new_locty = 0; ++ tpm_tis_prep_abort(s, s->active_locty, locty); ++ break; ++ } ++ } ++ ++ if ((val & TPM_TIS_ACCESS_REQUEST_USE)) { ++ if (s->active_locty != locty) { ++ if (TPM_TIS_IS_VALID_LOCTY(s->active_locty)) { ++ s->loc[locty].access |= TPM_TIS_ACCESS_REQUEST_USE; ++ } else { ++ /* no locality active -> make this one active now */ ++ active_locty = locty; ++ } ++ } ++ } ++ ++ if (set_new_locty) { ++ tpm_tis_new_active_locality(s, active_locty); ++ } ++ ++ break; ++ case TPM_TIS_REG_INT_ENABLE: ++ if (s->active_locty != locty) { ++ break; ++ } ++ ++ s->loc[locty].inte &= mask; ++ s->loc[locty].inte |= (val & (TPM_TIS_INT_ENABLED | ++ TPM_TIS_INT_POLARITY_MASK | ++ TPM_TIS_INTERRUPTS_SUPPORTED)); ++ break; ++ case TPM_TIS_REG_INT_VECTOR: ++ /* hard wired -- ignore */ ++ break; ++ case TPM_TIS_REG_INT_STATUS: ++ if (s->active_locty != locty) { ++ break; ++ } ++ ++ /* clearing of interrupt flags */ ++ if (((val & TPM_TIS_INTERRUPTS_SUPPORTED)) && ++ (s->loc[locty].ints & TPM_TIS_INTERRUPTS_SUPPORTED)) { ++ s->loc[locty].ints &= ~val; ++ if (s->loc[locty].ints == 0) { ++ qemu_irq_lower(s->irq); ++ trace_tpm_tis_mmio_write_lowering_irq(); ++ } ++ } ++ s->loc[locty].ints &= ~(val & TPM_TIS_INTERRUPTS_SUPPORTED); ++ break; ++ case TPM_TIS_REG_STS: ++ if (s->active_locty != locty) { ++ break; ++ } ++ ++ if (s->be_tpm_version == TPM_VERSION_2_0) { ++ /* some flags that are only supported for TPM 2 */ ++ if (val & TPM_TIS_STS_COMMAND_CANCEL) { ++ if (s->loc[locty].state == TPM_TIS_STATE_EXECUTION) { ++ /* ++ * request the backend to cancel. Some backends may not ++ * support it ++ */ ++ tpm_backend_cancel_cmd(s->be_driver); ++ } ++ } ++ ++ if (val & TPM_TIS_STS_RESET_ESTABLISHMENT_BIT) { ++ if (locty == 3 || locty == 4) { ++ tpm_backend_reset_tpm_established_flag(s->be_driver, locty); ++ } ++ } ++ } ++ ++ val &= (TPM_TIS_STS_COMMAND_READY | TPM_TIS_STS_TPM_GO | ++ TPM_TIS_STS_RESPONSE_RETRY); ++ ++ if (val == TPM_TIS_STS_COMMAND_READY) { ++ switch (s->loc[locty].state) { ++ ++ case TPM_TIS_STATE_READY: ++ s->rw_offset = 0; ++ break; ++ ++ case TPM_TIS_STATE_IDLE: ++ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_COMMAND_READY); ++ s->loc[locty].state = TPM_TIS_STATE_READY; ++ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_COMMAND_READY); ++ break; ++ ++ case TPM_TIS_STATE_EXECUTION: ++ case TPM_TIS_STATE_RECEPTION: ++ /* abort currently running command */ ++ trace_tpm_tis_mmio_write_init_abort(); ++ tpm_tis_prep_abort(s, locty, locty); ++ break; ++ ++ case TPM_TIS_STATE_COMPLETION: ++ s->rw_offset = 0; ++ /* shortcut to ready state with C/R set */ ++ s->loc[locty].state = TPM_TIS_STATE_READY; ++ if (!(s->loc[locty].sts & TPM_TIS_STS_COMMAND_READY)) { ++ tpm_tis_sts_set(&s->loc[locty], ++ TPM_TIS_STS_COMMAND_READY); ++ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_COMMAND_READY); ++ } ++ s->loc[locty].sts &= ~(TPM_TIS_STS_DATA_AVAILABLE); ++ break; ++ ++ } ++ } else if (val == TPM_TIS_STS_TPM_GO) { ++ switch (s->loc[locty].state) { ++ case TPM_TIS_STATE_RECEPTION: ++ if ((s->loc[locty].sts & TPM_TIS_STS_EXPECT) == 0) { ++ tpm_tis_tpm_send(s, locty); ++ } ++ break; ++ default: ++ /* ignore */ ++ break; ++ } ++ } else if (val == TPM_TIS_STS_RESPONSE_RETRY) { ++ switch (s->loc[locty].state) { ++ case TPM_TIS_STATE_COMPLETION: ++ s->rw_offset = 0; ++ tpm_tis_sts_set(&s->loc[locty], ++ TPM_TIS_STS_VALID| ++ TPM_TIS_STS_DATA_AVAILABLE); ++ break; ++ default: ++ /* ignore */ ++ break; ++ } ++ } ++ break; ++ case TPM_TIS_REG_DATA_FIFO: ++ case TPM_TIS_REG_DATA_XFIFO ... TPM_TIS_REG_DATA_XFIFO_END: ++ /* data fifo */ ++ if (s->active_locty != locty) { ++ break; ++ } ++ ++ if (s->loc[locty].state == TPM_TIS_STATE_IDLE || ++ s->loc[locty].state == TPM_TIS_STATE_EXECUTION || ++ s->loc[locty].state == TPM_TIS_STATE_COMPLETION) { ++ /* drop the byte */ ++ } else { ++ trace_tpm_tis_mmio_write_data2send(val, size); ++ if (s->loc[locty].state == TPM_TIS_STATE_READY) { ++ s->loc[locty].state = TPM_TIS_STATE_RECEPTION; ++ tpm_tis_sts_set(&s->loc[locty], ++ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); ++ } ++ ++ val >>= shift; ++ if (size > 4 - (addr & 0x3)) { ++ /* prevent access beyond FIFO */ ++ size = 4 - (addr & 0x3); ++ } ++ ++ while ((s->loc[locty].sts & TPM_TIS_STS_EXPECT) && size > 0) { ++ if (s->rw_offset < s->be_buffer_size) { ++ s->buffer[s->rw_offset++] = ++ (uint8_t)val; ++ val >>= 8; ++ size--; ++ } else { ++ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); ++ } ++ } ++ ++ /* check for complete packet */ ++ if (s->rw_offset > 5 && ++ (s->loc[locty].sts & TPM_TIS_STS_EXPECT)) { ++ /* we have a packet length - see if we have all of it */ ++ bool need_irq = !(s->loc[locty].sts & TPM_TIS_STS_VALID); ++ ++ len = tpm_cmd_get_size(&s->buffer); ++ if (len > s->rw_offset) { ++ tpm_tis_sts_set(&s->loc[locty], ++ TPM_TIS_STS_EXPECT | TPM_TIS_STS_VALID); ++ } else { ++ /* packet complete */ ++ tpm_tis_sts_set(&s->loc[locty], TPM_TIS_STS_VALID); ++ } ++ if (need_irq) { ++ tpm_tis_raise_irq(s, locty, TPM_TIS_INT_STS_VALID); ++ } ++ } ++ } ++ break; ++ case TPM_TIS_REG_INTERFACE_ID: ++ if (val & TPM_TIS_IFACE_ID_INT_SEL_LOCK) { ++ for (l = 0; l < TPM_TIS_NUM_LOCALITIES; l++) { ++ s->loc[l].iface_id |= TPM_TIS_IFACE_ID_INT_SEL_LOCK; ++ } ++ } ++ break; ++ } ++} ++ ++const MemoryRegionOps tpm_tis_memory_ops = { ++ .read = tpm_tis_mmio_read, ++ .write = tpm_tis_mmio_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++ .valid = { ++ .min_access_size = 1, ++ .max_access_size = 4, ++ }, ++}; ++ ++/* ++ * Get the TPMVersion of the backend device being used ++ */ ++enum TPMVersion tpm_tis_get_tpm_version(TPMState *s) ++{ ++ if (tpm_backend_had_startup_error(s->be_driver)) { ++ return TPM_VERSION_UNSPEC; ++ } ++ ++ return tpm_backend_get_tpm_version(s->be_driver); ++} ++ ++/* ++ * This function is called when the machine starts, resets or due to ++ * S3 resume. ++ */ ++void tpm_tis_reset(TPMState *s) ++{ ++ int c; ++ ++ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); ++ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), ++ TPM_TIS_BUFFER_MAX); ++ ++ if (s->ppi_enabled) { ++ tpm_ppi_reset(&s->ppi); ++ } ++ tpm_backend_reset(s->be_driver); ++ ++ s->active_locty = TPM_TIS_NO_LOCALITY; ++ s->next_locty = TPM_TIS_NO_LOCALITY; ++ s->aborting_locty = TPM_TIS_NO_LOCALITY; ++ ++ for (c = 0; c < TPM_TIS_NUM_LOCALITIES; c++) { ++ s->loc[c].access = TPM_TIS_ACCESS_TPM_REG_VALID_STS; ++ switch (s->be_tpm_version) { ++ case TPM_VERSION_UNSPEC: ++ break; ++ case TPM_VERSION_1_2: ++ s->loc[c].sts = TPM_TIS_STS_TPM_FAMILY1_2; ++ s->loc[c].iface_id = TPM_TIS_IFACE_ID_SUPPORTED_FLAGS1_3; ++ break; ++ case TPM_VERSION_2_0: ++ s->loc[c].sts = TPM_TIS_STS_TPM_FAMILY2_0; ++ s->loc[c].iface_id = TPM_TIS_IFACE_ID_SUPPORTED_FLAGS2_0; ++ break; ++ } ++ s->loc[c].inte = TPM_TIS_INT_POLARITY_LOW_LEVEL; ++ s->loc[c].ints = 0; ++ s->loc[c].state = TPM_TIS_STATE_IDLE; ++ ++ s->rw_offset = 0; ++ } ++ ++ if (tpm_backend_startup_tpm(s->be_driver, s->be_buffer_size) < 0) { ++ exit(1); ++ } ++} ++ ++/* persistent state handling */ ++ ++int tpm_tis_pre_save(TPMState *s) ++{ ++ uint8_t locty = s->active_locty; ++ ++ trace_tpm_tis_pre_save(locty, s->rw_offset); ++ ++ if (DEBUG_TIS) { ++ tpm_tis_dump_state(s, 0); ++ } ++ ++ /* ++ * Synchronize with backend completion. ++ */ ++ tpm_backend_finish_sync(s->be_driver); ++ ++ return 0; ++} ++ ++const VMStateDescription vmstate_locty = { ++ .name = "tpm-tis/locty", ++ .version_id = 0, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32(state, TPMLocality), ++ VMSTATE_UINT32(inte, TPMLocality), ++ VMSTATE_UINT32(ints, TPMLocality), ++ VMSTATE_UINT8(access, TPMLocality), ++ VMSTATE_UINT32(sts, TPMLocality), ++ VMSTATE_UINT32(iface_id, TPMLocality), ++ VMSTATE_END_OF_LIST(), ++ } ++}; ++ +diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c +new file mode 100644 +index 00000000..45e25c02 +--- /dev/null ++++ b/hw/tpm/tpm_tis_isa.c +@@ -0,0 +1,170 @@ ++/* ++ * tpm_tis_isa.c - QEMU's TPM TIS ISA Device ++ * ++ * Copyright (C) 2006,2010-2013 IBM Corporation ++ * ++ * Authors: ++ * Stefan Berger ++ * David Safford ++ * ++ * Xen 4 support: Andrease Niederl ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ * Implementation of the TIS interface according to specs found at ++ * http://www.trustedcomputinggroup.org. This implementation currently ++ * supports version 1.3, 21 March 2013 ++ * In the developers menu choose the PC Client section then find the TIS ++ * specification. ++ * ++ * TPM TIS for TPM 2 implementation following TCG PC Client Platform ++ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43 ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/isa/isa.h" ++#include "hw/qdev-properties.h" ++#include "migration/vmstate.h" ++#include "tpm_util.h" ++#include "tpm_tis.h" ++ ++typedef struct TPMStateISA { ++ /*< private >*/ ++ ISADevice parent_obj; ++ ++ /*< public >*/ ++ TPMState state; /* not a QOM object */ ++} TPMStateISA; ++ ++#define TPM_TIS_ISA(obj) OBJECT_CHECK(TPMStateISA, (obj), TYPE_TPM_TIS_ISA) ++ ++static int tpm_tis_pre_save_isa(void *opaque) ++{ ++ TPMStateISA *isadev = opaque; ++ ++ return tpm_tis_pre_save(&isadev->state); ++} ++ ++static const VMStateDescription vmstate_tpm_tis_isa = { ++ .name = "tpm-tis", ++ .version_id = 0, ++ .pre_save = tpm_tis_pre_save_isa, ++ .fields = (VMStateField[]) { ++ VMSTATE_BUFFER(state.buffer, TPMStateISA), ++ VMSTATE_UINT16(state.rw_offset, TPMStateISA), ++ VMSTATE_UINT8(state.active_locty, TPMStateISA), ++ VMSTATE_UINT8(state.aborting_locty, TPMStateISA), ++ VMSTATE_UINT8(state.next_locty, TPMStateISA), ++ ++ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateISA, TPM_TIS_NUM_LOCALITIES, 0, ++ vmstate_locty, TPMLocality), ++ ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void tpm_tis_isa_request_completed(TPMIf *ti, int ret) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(ti); ++ TPMState *s = &isadev->state; ++ ++ tpm_tis_request_completed(s, ret); ++} ++ ++static enum TPMVersion tpm_tis_isa_get_tpm_version(TPMIf *ti) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(ti); ++ TPMState *s = &isadev->state; ++ ++ return tpm_tis_get_tpm_version(s); ++} ++ ++static void tpm_tis_isa_reset(DeviceState *dev) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(dev); ++ TPMState *s = &isadev->state; ++ ++ return tpm_tis_reset(s); ++} ++ ++static Property tpm_tis_isa_properties[] = { ++ DEFINE_PROP_UINT32("irq", TPMStateISA, state.irq_num, TPM_TIS_IRQ), ++ DEFINE_PROP_TPMBE("tpmdev", TPMStateISA, state.be_driver), ++ DEFINE_PROP_BOOL("ppi", TPMStateISA, state.ppi_enabled, true), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void tpm_tis_isa_initfn(Object *obj) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(obj); ++ TPMState *s = &isadev->state; ++ ++ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, ++ s, "tpm-tis-mmio", ++ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); ++} ++ ++static void tpm_tis_isa_realizefn(DeviceState *dev, Error **errp) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(dev); ++ TPMState *s = &isadev->state; ++ ++ if (!tpm_find()) { ++ error_setg(errp, "at most one TPM device is permitted"); ++ return; ++ } ++ ++ if (!s->be_driver) { ++ error_setg(errp, "'tpmdev' property is required"); ++ return; ++ } ++ if (s->irq_num > 15) { ++ error_setg(errp, "IRQ %d is outside valid range of 0 to 15", ++ s->irq_num); ++ return; ++ } ++ ++ isa_init_irq(ISA_DEVICE(dev), &s->irq, s->irq_num); ++ ++ memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)), ++ TPM_TIS_ADDR_BASE, &s->mmio); ++ ++ if (s->ppi_enabled) { ++ tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)), ++ TPM_PPI_ADDR_BASE, OBJECT(dev)); ++ } ++} ++ ++static void tpm_tis_isa_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ TPMIfClass *tc = TPM_IF_CLASS(klass); ++ ++ dc->props = tpm_tis_isa_properties; ++ dc->vmsd = &vmstate_tpm_tis_isa; ++ tc->model = TPM_MODEL_TPM_TIS; ++ dc->realize = tpm_tis_isa_realizefn; ++ dc->reset = tpm_tis_isa_reset; ++ tc->request_completed = tpm_tis_isa_request_completed; ++ tc->get_version = tpm_tis_isa_get_tpm_version; ++} ++ ++static const TypeInfo tpm_tis_isa_info = { ++ .name = TYPE_TPM_TIS_ISA, ++ .parent = TYPE_ISA_DEVICE, ++ .instance_size = sizeof(TPMStateISA), ++ .instance_init = tpm_tis_isa_initfn, ++ .class_init = tpm_tis_isa_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_TPM_IF }, ++ { } ++ } ++}; ++ ++static void tpm_tis_isa_register(void) ++{ ++ type_register_static(&tpm_tis_isa_info); ++} ++ ++type_init(tpm_tis_isa_register) +-- +2.23.0 + diff --git a/tpm-Use-TPMState-as-a-common-struct.patch b/tpm-Use-TPMState-as-a-common-struct.patch new file mode 100644 index 0000000000000000000000000000000000000000..61a1dd037bb2356cb7307d53f82732af404ed4e2 --- /dev/null +++ b/tpm-Use-TPMState-as-a-common-struct.patch @@ -0,0 +1,314 @@ +From c57e57c86f9d3c13b33746436bc1f09db88d4d42 Mon Sep 17 00:00:00 2001 +From: jiangfangjie +Date: Tue, 11 Aug 2020 02:52:12 +0000 +Subject: [PATCH 11/19] tpm: Use TPMState as a common struct + +As we plan to introduce a SysBus TPM TIS device, let's +make the TPMState a common struct usable by both the +ISADevice and the SysBusDevice. TPMStateISA embeds the +struct and inherits from the ISADevice. + +The prototype of functions bound to be used by both +the ISA and SysBus devices is changed to take TPMState +handle. + +A bunch of structs also are renamed to be specialized +for the ISA device. Besides those transformations, no +functional change is expected. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-3-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + hw/tpm/tpm_tis.c | 147 +++++++++++++++++++++++++++++------------------ + 1 file changed, 92 insertions(+), 55 deletions(-) + +diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c +index 49d44652..735a528f 100644 +--- a/hw/tpm/tpm_tis.c ++++ b/hw/tpm/tpm_tis.c +@@ -62,7 +62,6 @@ typedef struct TPMLocality { + } TPMLocality; + + typedef struct TPMState { +- ISADevice busdev; + MemoryRegion mmio; + + unsigned char buffer[TPM_TIS_BUFFER_MAX]; +@@ -88,7 +87,15 @@ typedef struct TPMState { + TPMPPI ppi; + } TPMState; + +-#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS_ISA) ++typedef struct TPMStateISA { ++ /*< private >*/ ++ ISADevice parent_obj; ++ ++ /*< public >*/ ++ TPMState state; /* not a QOM object */ ++} TPMStateISA; ++ ++#define TPM_TIS_ISA(obj) OBJECT_CHECK(TPMStateISA, (obj), TYPE_TPM_TIS_ISA) + + #define DEBUG_TIS 0 + +@@ -278,9 +285,8 @@ static void tpm_tis_prep_abort(TPMState *s, uint8_t locty, uint8_t newlocty) + /* + * Callback from the TPM to indicate that the response was received. + */ +-static void tpm_tis_request_completed(TPMIf *ti, int ret) ++static void tpm_tis_request_completed(TPMState *s, int ret) + { +- TPMState *s = TPM(ti); + uint8_t locty = s->cmd.locty; + uint8_t l; + +@@ -335,7 +341,7 @@ static uint32_t tpm_tis_data_read(TPMState *s, uint8_t locty) + } + + #ifdef DEBUG_TIS +-static void tpm_tis_dump_state(void *opaque, hwaddr addr) ++static void tpm_tis_dump_state(TPMState *s, hwaddr addr) + { + static const unsigned regs[] = { + TPM_TIS_REG_ACCESS, +@@ -350,7 +356,6 @@ static void tpm_tis_dump_state(void *opaque, hwaddr addr) + int idx; + uint8_t locty = tpm_tis_locality_from_addr(addr); + hwaddr base = addr & ~0xfff; +- TPMState *s = opaque; + + printf("tpm_tis: active locality : %d\n" + "tpm_tis: state of locality %d : %d\n" +@@ -360,7 +365,7 @@ static void tpm_tis_dump_state(void *opaque, hwaddr addr) + + for (idx = 0; regs[idx] != 0xfff; idx++) { + printf("tpm_tis: 0x%04x : 0x%08x\n", regs[idx], +- (int)tpm_tis_mmio_read(opaque, base + regs[idx], 4)); ++ (int)tpm_tis_mmio_read(s, base + regs[idx], 4)); + } + + printf("tpm_tis: r/w offset : %d\n" +@@ -485,7 +490,7 @@ static uint64_t tpm_tis_mmio_read(void *opaque, hwaddr addr, + break; + #ifdef DEBUG_TIS + case TPM_TIS_REG_DEBUG: +- tpm_tis_dump_state(opaque, addr); ++ tpm_tis_dump_state(s, addr); + break; + #endif + } +@@ -832,10 +837,8 @@ static const MemoryRegionOps tpm_tis_memory_ops = { + /* + * Get the TPMVersion of the backend device being used + */ +-static enum TPMVersion tpm_tis_get_tpm_version(TPMIf *ti) ++static enum TPMVersion tpm_tis_get_tpm_version(TPMState *s) + { +- TPMState *s = TPM(ti); +- + if (tpm_backend_had_startup_error(s->be_driver)) { + return TPM_VERSION_UNSPEC; + } +@@ -847,9 +850,8 @@ static enum TPMVersion tpm_tis_get_tpm_version(TPMIf *ti) + * This function is called when the machine starts, resets or due to + * S3 resume. + */ +-static void tpm_tis_reset(DeviceState *dev) ++static void tpm_tis_reset(TPMState *s) + { +- TPMState *s = TPM(dev); + int c; + + s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); +@@ -893,15 +895,14 @@ static void tpm_tis_reset(DeviceState *dev) + + /* persistent state handling */ + +-static int tpm_tis_pre_save(void *opaque) ++static int tpm_tis_pre_save(TPMState *s) + { +- TPMState *s = opaque; + uint8_t locty = s->active_locty; + + trace_tpm_tis_pre_save(locty, s->rw_offset); + + if (DEBUG_TIS) { +- tpm_tis_dump_state(opaque, 0); ++ tpm_tis_dump_state(s, 0); + } + + /* +@@ -926,34 +927,78 @@ static const VMStateDescription vmstate_locty = { + } + }; + +-static const VMStateDescription vmstate_tpm_tis = { ++/* ISA */ ++ ++static int tpm_tis_pre_save_isa(void *opaque) ++{ ++ TPMStateISA *isadev = opaque; ++ ++ return tpm_tis_pre_save(&isadev->state); ++} ++ ++static const VMStateDescription vmstate_tpm_tis_isa = { + .name = "tpm-tis", + .version_id = 0, +- .pre_save = tpm_tis_pre_save, ++ .pre_save = tpm_tis_pre_save_isa, + .fields = (VMStateField[]) { +- VMSTATE_BUFFER(buffer, TPMState), +- VMSTATE_UINT16(rw_offset, TPMState), +- VMSTATE_UINT8(active_locty, TPMState), +- VMSTATE_UINT8(aborting_locty, TPMState), +- VMSTATE_UINT8(next_locty, TPMState), ++ VMSTATE_BUFFER(state.buffer, TPMStateISA), ++ VMSTATE_UINT16(state.rw_offset, TPMStateISA), ++ VMSTATE_UINT8(state.active_locty, TPMStateISA), ++ VMSTATE_UINT8(state.aborting_locty, TPMStateISA), ++ VMSTATE_UINT8(state.next_locty, TPMStateISA), + +- VMSTATE_STRUCT_ARRAY(loc, TPMState, TPM_TIS_NUM_LOCALITIES, 0, ++ VMSTATE_STRUCT_ARRAY(state.loc, TPMStateISA, TPM_TIS_NUM_LOCALITIES, 0, + vmstate_locty, TPMLocality), + + VMSTATE_END_OF_LIST() + } + }; + +-static Property tpm_tis_properties[] = { +- DEFINE_PROP_UINT32("irq", TPMState, irq_num, TPM_TIS_IRQ), +- DEFINE_PROP_TPMBE("tpmdev", TPMState, be_driver), +- DEFINE_PROP_BOOL("ppi", TPMState, ppi_enabled, true), ++static void tpm_tis_isa_request_completed(TPMIf *ti, int ret) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(ti); ++ TPMState *s = &isadev->state; ++ ++ tpm_tis_request_completed(s, ret); ++} ++ ++static enum TPMVersion tpm_tis_isa_get_tpm_version(TPMIf *ti) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(ti); ++ TPMState *s = &isadev->state; ++ ++ return tpm_tis_get_tpm_version(s); ++} ++ ++static void tpm_tis_isa_reset(DeviceState *dev) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(dev); ++ TPMState *s = &isadev->state; ++ ++ return tpm_tis_reset(s); ++} ++ ++static Property tpm_tis_isa_properties[] = { ++ DEFINE_PROP_UINT32("irq", TPMStateISA, state.irq_num, TPM_TIS_IRQ), ++ DEFINE_PROP_TPMBE("tpmdev", TPMStateISA, state.be_driver), ++ DEFINE_PROP_BOOL("ppi", TPMStateISA, state.ppi_enabled, true), + DEFINE_PROP_END_OF_LIST(), + }; + +-static void tpm_tis_realizefn(DeviceState *dev, Error **errp) ++static void tpm_tis_isa_initfn(Object *obj) + { +- TPMState *s = TPM(dev); ++ TPMStateISA *isadev = TPM_TIS_ISA(obj); ++ TPMState *s = &isadev->state; ++ ++ memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops, ++ s, "tpm-tis-mmio", ++ TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); ++} ++ ++static void tpm_tis_isa_realizefn(DeviceState *dev, Error **errp) ++{ ++ TPMStateISA *isadev = TPM_TIS_ISA(dev); ++ TPMState *s = &isadev->state; + + if (!tpm_find()) { + error_setg(errp, "at most one TPM device is permitted"); +@@ -970,55 +1015,47 @@ static void tpm_tis_realizefn(DeviceState *dev, Error **errp) + return; + } + +- isa_init_irq(&s->busdev, &s->irq, s->irq_num); ++ isa_init_irq(ISA_DEVICE(dev), &s->irq, s->irq_num); + + memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)), + TPM_TIS_ADDR_BASE, &s->mmio); + + if (s->ppi_enabled) { + tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)), +- TPM_PPI_ADDR_BASE, OBJECT(s)); ++ TPM_PPI_ADDR_BASE, OBJECT(dev)); + } + } + +-static void tpm_tis_initfn(Object *obj) +-{ +- TPMState *s = TPM(obj); +- +- memory_region_init_io(&s->mmio, OBJECT(s), &tpm_tis_memory_ops, +- s, "tpm-tis-mmio", +- TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT); +-} +- +-static void tpm_tis_class_init(ObjectClass *klass, void *data) ++static void tpm_tis_isa_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + TPMIfClass *tc = TPM_IF_CLASS(klass); + +- dc->realize = tpm_tis_realizefn; +- dc->props = tpm_tis_properties; +- dc->reset = tpm_tis_reset; +- dc->vmsd = &vmstate_tpm_tis; ++ dc->props = tpm_tis_isa_properties; ++ dc->vmsd = &vmstate_tpm_tis_isa; + tc->model = TPM_MODEL_TPM_TIS; +- tc->get_version = tpm_tis_get_tpm_version; +- tc->request_completed = tpm_tis_request_completed; ++ dc->realize = tpm_tis_isa_realizefn; ++ dc->reset = tpm_tis_isa_reset; ++ tc->request_completed = tpm_tis_isa_request_completed; ++ tc->get_version = tpm_tis_isa_get_tpm_version; ++ + } + +-static const TypeInfo tpm_tis_info = { ++static const TypeInfo tpm_tis_isa_info = { + .name = TYPE_TPM_TIS_ISA, + .parent = TYPE_ISA_DEVICE, +- .instance_size = sizeof(TPMState), +- .instance_init = tpm_tis_initfn, +- .class_init = tpm_tis_class_init, ++ .instance_size = sizeof(TPMStateISA), ++ .instance_init = tpm_tis_isa_initfn, ++ .class_init = tpm_tis_isa_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_TPM_IF }, + { } + } + }; + +-static void tpm_tis_register(void) ++static void tpm_tis_isa_register(void) + { +- type_register_static(&tpm_tis_info); ++ type_register_static(&tpm_tis_isa_info); + } + +-type_init(tpm_tis_register) ++type_init(tpm_tis_isa_register) +-- +2.23.0 + diff --git a/tpm-ppi-page-align-PPI-RAM.patch b/tpm-ppi-page-align-PPI-RAM.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7ba2c876605392e79d887b89a4a274cb51660d4 --- /dev/null +++ b/tpm-ppi-page-align-PPI-RAM.patch @@ -0,0 +1,43 @@ +From 26b54c545f253049faa633ff886132602ff47241 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 3 Jan 2020 11:39:59 +0400 +Subject: [PATCH 02/19] tpm-ppi: page-align PPI RAM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +post-copy migration fails on destination with error such as: +2019-12-26T10:22:44.714644Z qemu-kvm: ram_block_discard_range: +Unaligned start address: 0x559d2afae9a0 + +Use qemu_memalign() to constrain the PPI RAM memory alignment. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Marc-André Lureau +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Stefan Berger +Signed-off-by: Stefan Berger +Message-id: 20200103074000.1006389-3-marcandre.lureau@redhat.com +Signed-off-by: jiangfangjie +--- + hw/tpm/tpm_ppi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c +index cd8205f2..6509ffd4 100644 +--- a/hw/tpm/tpm_ppi.c ++++ b/hw/tpm/tpm_ppi.c +@@ -44,7 +44,8 @@ void tpm_ppi_reset(TPMPPI *tpmppi) + void tpm_ppi_init(TPMPPI *tpmppi, struct MemoryRegion *m, + hwaddr addr, Object *obj) + { +- tpmppi->buf = g_malloc0(HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); ++ tpmppi->buf = qemu_memalign(qemu_real_host_page_size, ++ HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE)); + memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi", + TPM_PPI_ADDR_SIZE, tpmppi->buf); + vmstate_register_ram(&tpmppi->ram, DEVICE(obj)); +-- +2.23.0 + diff --git a/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch b/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea6e1d28a10cb6d29ba1c1c76245ef6749825ba2 --- /dev/null +++ b/tpm-rename-TPM_TIS-into-TPM_TIS_ISA.patch @@ -0,0 +1,101 @@ +From 7974f8ffd75171be106a1ce2705878abbb6c4477 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 5 Mar 2020 17:51:40 +0100 +Subject: [PATCH 10/19] tpm: rename TPM_TIS into TPM_TIS_ISA +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +As we plan to introduce a sysbus TPM_TIS, let's rename +TPM_TIS into TPM_TIS_ISA. + +Signed-off-by: Eric Auger +Reviewed-by: Stefan Berger +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Ard Biesheuvel +Acked-by: Ard Biesheuvel +Message-id: 20200305165149.618-2-eric.auger@redhat.com +Signed-off-by: Stefan Berger +Signed-off-by: jiangfangjie +--- + hw/i386/acpi-build.c | 6 +++--- + hw/tpm/tpm_tis.c | 4 ++-- + include/sysemu/tpm.h | 6 +++--- + 3 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index c97731ec..093f7d93 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2007,7 +2007,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + } + } + +- if (TPM_IS_TIS(tpm_find())) { ++ if (TPM_IS_TIS_ISA(tpm_find())) { + aml_append(crs, aml_memory32_fixed(TPM_TIS_ADDR_BASE, + TPM_TIS_ADDR_SIZE, AML_READ_WRITE)); + } +@@ -2178,7 +2178,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + /* Scan all PCI buses. Generate tables to support hotplug. */ + build_append_pci_bus_devices(scope, bus, pm->pcihp_bridge_en); + +- if (TPM_IS_TIS(tpm)) { ++ if (TPM_IS_TIS_ISA(tpm)) { + if (misc->tpm_version == TPM_VERSION_2_0) { + dev = aml_device("TPM"); + aml_append(dev, aml_name_decl("_HID", +@@ -2285,7 +2285,7 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, GArray *tcpalog) + (char *)&tpm2_ptr->log_area_start_address - table_data->data; + + tpm2_ptr->platform_class = cpu_to_le16(TPM2_ACPI_CLASS_CLIENT); +- if (TPM_IS_TIS(tpm_find())) { ++ if (TPM_IS_TIS_ISA(tpm_find())) { + tpm2_ptr->control_area_address = cpu_to_le64(0); + tpm2_ptr->start_method = cpu_to_le32(TPM2_START_METHOD_MMIO); + } else if (TPM_IS_CRB(tpm_find())) { +diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c +index 96a9ac48..49d44652 100644 +--- a/hw/tpm/tpm_tis.c ++++ b/hw/tpm/tpm_tis.c +@@ -88,7 +88,7 @@ typedef struct TPMState { + TPMPPI ppi; + } TPMState; + +-#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS) ++#define TPM(obj) OBJECT_CHECK(TPMState, (obj), TYPE_TPM_TIS_ISA) + + #define DEBUG_TIS 0 + +@@ -1005,7 +1005,7 @@ static void tpm_tis_class_init(ObjectClass *klass, void *data) + } + + static const TypeInfo tpm_tis_info = { +- .name = TYPE_TPM_TIS, ++ .name = TYPE_TPM_TIS_ISA, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(TPMState), + .instance_init = tpm_tis_initfn, +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 15979a36..1691b92c 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -43,12 +43,12 @@ typedef struct TPMIfClass { + enum TPMVersion (*get_version)(TPMIf *obj); + } TPMIfClass; + +-#define TYPE_TPM_TIS "tpm-tis" ++#define TYPE_TPM_TIS_ISA "tpm-tis" + #define TYPE_TPM_CRB "tpm-crb" + #define TYPE_TPM_SPAPR "tpm-spapr" + +-#define TPM_IS_TIS(chr) \ +- object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS) ++#define TPM_IS_TIS_ISA(chr) \ ++ object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_ISA) + #define TPM_IS_CRB(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) + #define TPM_IS_SPAPR(chr) \ +-- +2.23.0 + diff --git a/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch b/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffc0b62ed7ef655056cfd1280282b768f22ad501 --- /dev/null +++ b/tpm_spapr-Support-TPM-for-ppc64-using-CRQ-based-inte.patch @@ -0,0 +1,552 @@ +From 14402a8ca57fb722eb324d141fafb41ef06f4c2b Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Tue, 21 Jan 2020 10:29:32 -0500 +Subject: [PATCH 06/19] tpm_spapr: Support TPM for ppc64 using CRQ based + interface + +Implement support for TPM on ppc64 by implementing the vTPM CRQ interface +as a frontend. It can use the tpm_emulator driver backend with the external +swtpm. + +The Linux vTPM driver for ppc64 works with this emulation. + +This TPM emulator also handles the TPM 2 case. + +Signed-off-by: Stefan Berger +Reviewed-by: David Gibson +Message-Id: <20200121152935.649898-4-stefanb@linux.ibm.com> +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + docs/specs/tpm.txt | 20 ++- + hw/tpm/Kconfig | 6 + + hw/tpm/Makefile.objs | 1 + + hw/tpm/tpm_spapr.c | 379 +++++++++++++++++++++++++++++++++++++++++++ + hw/tpm/trace-events | 12 ++ + include/sysemu/tpm.h | 3 + + qapi/tpm.json | 6 +- + 7 files changed, 423 insertions(+), 4 deletions(-) + create mode 100644 hw/tpm/tpm_spapr.c + +diff --git a/docs/specs/tpm.txt b/docs/specs/tpm.txt +index 9c8cca04..9c3e67d8 100644 +--- a/docs/specs/tpm.txt ++++ b/docs/specs/tpm.txt +@@ -34,6 +34,12 @@ The CRB interface makes a memory mapped IO region in the area 0xfed40000 - + QEMU files related to TPM CRB interface: + - hw/tpm/tpm_crb.c + ++ ++pSeries (ppc64) machines offer a tpm-spapr device model. ++ ++QEMU files related to the SPAPR interface: ++ - hw/tpm/tpm_spapr.c ++ + = fw_cfg interface = + + The bios/firmware may read the "etc/tpm/config" fw_cfg entry for +@@ -281,7 +287,7 @@ swtpm socket --tpmstate dir=/tmp/mytpm1 \ + --log level=20 + + Command line to start QEMU with the TPM emulator device communicating with +-the swtpm: ++the swtpm (x86): + + qemu-system-x86_64 -display sdl -accel kvm \ + -m 1024 -boot d -bios bios-256k.bin -boot menu=on \ +@@ -289,6 +295,18 @@ qemu-system-x86_64 -display sdl -accel kvm \ + -tpmdev emulator,id=tpm0,chardev=chrtpm \ + -device tpm-tis,tpmdev=tpm0 test.img + ++In case a pSeries machine is emulated, use the following command line: ++ ++qemu-system-ppc64 -display sdl -machine pseries,accel=kvm \ ++ -m 1024 -bios slof.bin -boot menu=on \ ++ -nodefaults -device VGA -device pci-ohci -device usb-kbd \ ++ -chardev socket,id=chrtpm,path=/tmp/mytpm1/swtpm-sock \ ++ -tpmdev emulator,id=tpm0,chardev=chrtpm \ ++ -device tpm-spapr,tpmdev=tpm0 \ ++ -device spapr-vscsi,id=scsi0,reg=0x00002000 \ ++ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ ++ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 ++ + + In case SeaBIOS is used as firmware, it should show the TPM menu item + after entering the menu with 'ESC'. +diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig +index 4c8ee87d..4d4ab085 100644 +--- a/hw/tpm/Kconfig ++++ b/hw/tpm/Kconfig +@@ -22,3 +22,9 @@ config TPM_EMULATOR + bool + default y + depends on TPMDEV ++ ++config TPM_SPAPR ++ bool ++ default n ++ depends on TPM && PSERIES ++ select TPMDEV +diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs +index de0b85d0..85eb99ae 100644 +--- a/hw/tpm/Makefile.objs ++++ b/hw/tpm/Makefile.objs +@@ -4,3 +4,4 @@ common-obj-$(CONFIG_TPM_TIS) += tpm_tis.o + common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o + common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o + common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o ++obj-$(CONFIG_TPM_SPAPR) += tpm_spapr.o +diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c +new file mode 100644 +index 00000000..1db9696a +--- /dev/null ++++ b/hw/tpm/tpm_spapr.c +@@ -0,0 +1,379 @@ ++/* ++ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator ++ * ++ * PAPR Virtual TPM ++ * ++ * Copyright (c) 2015, 2017, 2019 IBM Corporation. ++ * ++ * Authors: ++ * Stefan Berger ++ * ++ * This code is licensed under the GPL version 2 or later. See the ++ * COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/error-report.h" ++#include "qapi/error.h" ++#include "hw/qdev-properties.h" ++#include "migration/vmstate.h" ++ ++#include "sysemu/tpm_backend.h" ++#include "tpm_int.h" ++#include "tpm_util.h" ++ ++#include "hw/ppc/spapr.h" ++#include "hw/ppc/spapr_vio.h" ++#include "trace.h" ++ ++#define DEBUG_SPAPR 0 ++ ++#define VIO_SPAPR_VTPM(obj) \ ++ OBJECT_CHECK(SpaprTpmState, (obj), TYPE_TPM_SPAPR) ++ ++typedef struct TpmCrq { ++ uint8_t valid; /* 0x80: cmd; 0xc0: init crq */ ++ /* 0x81-0x83: CRQ message response */ ++ uint8_t msg; /* see below */ ++ uint16_t len; /* len of TPM request; len of TPM response */ ++ uint32_t data; /* rtce_dma_handle when sending TPM request */ ++ uint64_t reserved; ++} TpmCrq; ++ ++#define SPAPR_VTPM_VALID_INIT_CRQ_COMMAND 0xC0 ++#define SPAPR_VTPM_VALID_COMMAND 0x80 ++#define SPAPR_VTPM_MSG_RESULT 0x80 ++ ++/* msg types for valid = SPAPR_VTPM_VALID_INIT_CRQ */ ++#define SPAPR_VTPM_INIT_CRQ_RESULT 0x1 ++#define SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT 0x2 ++ ++/* msg types for valid = SPAPR_VTPM_VALID_CMD */ ++#define SPAPR_VTPM_GET_VERSION 0x1 ++#define SPAPR_VTPM_TPM_COMMAND 0x2 ++#define SPAPR_VTPM_GET_RTCE_BUFFER_SIZE 0x3 ++#define SPAPR_VTPM_PREPARE_TO_SUSPEND 0x4 ++ ++/* response error messages */ ++#define SPAPR_VTPM_VTPM_ERROR 0xff ++ ++/* error codes */ ++#define SPAPR_VTPM_ERR_COPY_IN_FAILED 0x3 ++#define SPAPR_VTPM_ERR_COPY_OUT_FAILED 0x4 ++ ++#define TPM_SPAPR_BUFFER_MAX 4096 ++ ++typedef struct { ++ SpaprVioDevice vdev; ++ ++ TpmCrq crq; /* track single TPM command */ ++ ++ uint8_t state; ++#define SPAPR_VTPM_STATE_NONE 0 ++#define SPAPR_VTPM_STATE_EXECUTION 1 ++#define SPAPR_VTPM_STATE_COMPLETION 2 ++ ++ unsigned char *buffer; ++ ++ TPMBackendCmd cmd; ++ ++ TPMBackend *be_driver; ++ TPMVersion be_tpm_version; ++ ++ size_t be_buffer_size; ++} SpaprTpmState; ++ ++/* ++ * Send a request to the TPM. ++ */ ++static void tpm_spapr_tpm_send(SpaprTpmState *s) ++{ ++ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, s->be_buffer_size, "To TPM"); ++ } ++ ++ s->state = SPAPR_VTPM_STATE_EXECUTION; ++ s->cmd = (TPMBackendCmd) { ++ .locty = 0, ++ .in = s->buffer, ++ .in_len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size), ++ .out = s->buffer, ++ .out_len = s->be_buffer_size, ++ }; ++ ++ tpm_backend_deliver_request(s->be_driver, &s->cmd); ++} ++ ++static int tpm_spapr_process_cmd(SpaprTpmState *s, uint64_t dataptr) ++{ ++ long rc; ++ ++ /* a max. of be_buffer_size bytes can be transported */ ++ rc = spapr_vio_dma_read(&s->vdev, dataptr, ++ s->buffer, s->be_buffer_size); ++ if (rc) { ++ error_report("tpm_spapr_got_payload: DMA read failure"); ++ } ++ /* let vTPM handle any malformed request */ ++ tpm_spapr_tpm_send(s); ++ ++ return rc; ++} ++ ++static inline int spapr_tpm_send_crq(struct SpaprVioDevice *dev, TpmCrq *crq) ++{ ++ return spapr_vio_send_crq(dev, (uint8_t *)crq); ++} ++ ++static int tpm_spapr_do_crq(struct SpaprVioDevice *dev, uint8_t *crq_data) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); ++ TpmCrq local_crq; ++ TpmCrq *crq = &s->crq; /* requests only */ ++ int rc; ++ uint8_t valid = crq_data[0]; ++ uint8_t msg = crq_data[1]; ++ ++ trace_tpm_spapr_do_crq(valid, msg); ++ ++ switch (valid) { ++ case SPAPR_VTPM_VALID_INIT_CRQ_COMMAND: /* Init command/response */ ++ ++ /* Respond to initialization request */ ++ switch (msg) { ++ case SPAPR_VTPM_INIT_CRQ_RESULT: ++ trace_tpm_spapr_do_crq_crq_result(); ++ memset(&local_crq, 0, sizeof(local_crq)); ++ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; ++ local_crq.msg = SPAPR_VTPM_INIT_CRQ_RESULT; ++ spapr_tpm_send_crq(dev, &local_crq); ++ break; ++ ++ case SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT: ++ trace_tpm_spapr_do_crq_crq_complete_result(); ++ memset(&local_crq, 0, sizeof(local_crq)); ++ local_crq.valid = SPAPR_VTPM_VALID_INIT_CRQ_COMMAND; ++ local_crq.msg = SPAPR_VTPM_INIT_CRQ_COMPLETE_RESULT; ++ spapr_tpm_send_crq(dev, &local_crq); ++ break; ++ } ++ ++ break; ++ case SPAPR_VTPM_VALID_COMMAND: /* Payloads */ ++ switch (msg) { ++ case SPAPR_VTPM_TPM_COMMAND: ++ trace_tpm_spapr_do_crq_tpm_command(); ++ if (s->state == SPAPR_VTPM_STATE_EXECUTION) { ++ return H_BUSY; ++ } ++ memcpy(crq, crq_data, sizeof(*crq)); ++ ++ rc = tpm_spapr_process_cmd(s, be32_to_cpu(crq->data)); ++ ++ if (rc == H_SUCCESS) { ++ crq->valid = be16_to_cpu(0); ++ } else { ++ local_crq.valid = SPAPR_VTPM_MSG_RESULT; ++ local_crq.msg = SPAPR_VTPM_VTPM_ERROR; ++ local_crq.len = cpu_to_be16(0); ++ local_crq.data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_IN_FAILED); ++ spapr_tpm_send_crq(dev, &local_crq); ++ } ++ break; ++ ++ case SPAPR_VTPM_GET_RTCE_BUFFER_SIZE: ++ trace_tpm_spapr_do_crq_tpm_get_rtce_buffer_size(s->be_buffer_size); ++ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; ++ local_crq.msg = SPAPR_VTPM_GET_RTCE_BUFFER_SIZE | ++ SPAPR_VTPM_MSG_RESULT; ++ local_crq.len = cpu_to_be16(s->be_buffer_size); ++ spapr_tpm_send_crq(dev, &local_crq); ++ break; ++ ++ case SPAPR_VTPM_GET_VERSION: ++ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; ++ local_crq.msg = SPAPR_VTPM_GET_VERSION | SPAPR_VTPM_MSG_RESULT; ++ local_crq.len = cpu_to_be16(0); ++ switch (s->be_tpm_version) { ++ case TPM_VERSION_1_2: ++ local_crq.data = cpu_to_be32(1); ++ break; ++ case TPM_VERSION_2_0: ++ local_crq.data = cpu_to_be32(2); ++ break; ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ trace_tpm_spapr_do_crq_get_version(be32_to_cpu(local_crq.data)); ++ spapr_tpm_send_crq(dev, &local_crq); ++ break; ++ ++ case SPAPR_VTPM_PREPARE_TO_SUSPEND: ++ trace_tpm_spapr_do_crq_prepare_to_suspend(); ++ local_crq.valid = SPAPR_VTPM_VALID_COMMAND; ++ local_crq.msg = SPAPR_VTPM_PREPARE_TO_SUSPEND | ++ SPAPR_VTPM_MSG_RESULT; ++ spapr_tpm_send_crq(dev, &local_crq); ++ break; ++ ++ default: ++ trace_tpm_spapr_do_crq_unknown_msg_type(crq->msg); ++ } ++ break; ++ default: ++ trace_tpm_spapr_do_crq_unknown_crq(valid, msg); ++ }; ++ ++ return H_SUCCESS; ++} ++ ++static void tpm_spapr_request_completed(TPMIf *ti, int ret) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(ti); ++ TpmCrq *crq = &s->crq; ++ uint32_t len; ++ int rc; ++ ++ s->state = SPAPR_VTPM_STATE_COMPLETION; ++ ++ /* a max. of be_buffer_size bytes can be transported */ ++ len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); ++ rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), ++ s->buffer, len); ++ ++ if (trace_event_get_state_backends(TRACE_TPM_SPAPR_SHOW_BUFFER)) { ++ tpm_util_show_buffer(s->buffer, len, "From TPM"); ++ } ++ ++ crq->valid = SPAPR_VTPM_MSG_RESULT; ++ if (rc == H_SUCCESS) { ++ crq->msg = SPAPR_VTPM_TPM_COMMAND | SPAPR_VTPM_MSG_RESULT; ++ crq->len = cpu_to_be16(len); ++ } else { ++ error_report("%s: DMA write failure", __func__); ++ crq->msg = SPAPR_VTPM_VTPM_ERROR; ++ crq->len = cpu_to_be16(0); ++ crq->data = cpu_to_be32(SPAPR_VTPM_ERR_COPY_OUT_FAILED); ++ } ++ ++ rc = spapr_tpm_send_crq(&s->vdev, crq); ++ if (rc) { ++ error_report("%s: Error sending response", __func__); ++ } ++} ++ ++static int tpm_spapr_do_startup_tpm(SpaprTpmState *s, size_t buffersize) ++{ ++ return tpm_backend_startup_tpm(s->be_driver, buffersize); ++} ++ ++static const char *tpm_spapr_get_dt_compatible(SpaprVioDevice *dev) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); ++ ++ switch (s->be_tpm_version) { ++ case TPM_VERSION_1_2: ++ return "IBM,vtpm"; ++ case TPM_VERSION_2_0: ++ return "IBM,vtpm20"; ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++static void tpm_spapr_reset(SpaprVioDevice *dev) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); ++ ++ s->state = SPAPR_VTPM_STATE_NONE; ++ ++ s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); ++ ++ s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->be_driver), ++ TPM_SPAPR_BUFFER_MAX); ++ ++ tpm_backend_reset(s->be_driver); ++ tpm_spapr_do_startup_tpm(s, s->be_buffer_size); ++} ++ ++static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(ti); ++ ++ if (tpm_backend_had_startup_error(s->be_driver)) { ++ return TPM_VERSION_UNSPEC; ++ } ++ ++ return tpm_backend_get_tpm_version(s->be_driver); ++} ++ ++static const VMStateDescription vmstate_spapr_vtpm = { ++ .name = "tpm-spapr", ++ .unmigratable = 1, ++}; ++ ++static Property tpm_spapr_properties[] = { ++ DEFINE_SPAPR_PROPERTIES(SpaprTpmState, vdev), ++ DEFINE_PROP_TPMBE("tpmdev", SpaprTpmState, be_driver), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ ++static void tpm_spapr_realizefn(SpaprVioDevice *dev, Error **errp) ++{ ++ SpaprTpmState *s = VIO_SPAPR_VTPM(dev); ++ ++ if (!tpm_find()) { ++ error_setg(errp, "at most one TPM device is permitted"); ++ return; ++ } ++ ++ dev->crq.SendFunc = tpm_spapr_do_crq; ++ ++ if (!s->be_driver) { ++ error_setg(errp, "'tpmdev' property is required"); ++ return; ++ } ++ s->buffer = g_malloc(TPM_SPAPR_BUFFER_MAX); ++} ++ ++static void tpm_spapr_class_init(ObjectClass *klass, void *data) ++{ ++ DeviceClass *dc = DEVICE_CLASS(klass); ++ SpaprVioDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); ++ TPMIfClass *tc = TPM_IF_CLASS(klass); ++ ++ k->realize = tpm_spapr_realizefn; ++ k->reset = tpm_spapr_reset; ++ k->dt_name = "vtpm"; ++ k->dt_type = "IBM,vtpm"; ++ k->get_dt_compatible = tpm_spapr_get_dt_compatible; ++ k->signal_mask = 0x00000001; ++ set_bit(DEVICE_CATEGORY_MISC, dc->categories); ++ dc->props = tpm_spapr_properties; ++ k->rtce_window_size = 0x10000000; ++ dc->vmsd = &vmstate_spapr_vtpm; ++ ++ tc->model = TPM_MODEL_TPM_SPAPR; ++ tc->get_version = tpm_spapr_get_version; ++ tc->request_completed = tpm_spapr_request_completed; ++} ++ ++static const TypeInfo tpm_spapr_info = { ++ .name = TYPE_TPM_SPAPR, ++ .parent = TYPE_VIO_SPAPR_DEVICE, ++ .instance_size = sizeof(SpaprTpmState), ++ .class_init = tpm_spapr_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_TPM_IF }, ++ { } ++ } ++}; ++ ++static void tpm_spapr_register_types(void) ++{ ++ type_register_static(&tpm_spapr_info); ++} ++ ++type_init(tpm_spapr_register_types) +diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events +index 82c45ee5..edbe1bd7 100644 +--- a/hw/tpm/trace-events ++++ b/hw/tpm/trace-events +@@ -55,3 +55,15 @@ tpm_tis_pre_save(uint8_t locty, uint32_t rw_offset) "locty: %d, rw_offset = %u" + + # tpm_ppi.c + tpm_ppi_memset(uint8_t *ptr, size_t size) "memset: %p %zu" ++ ++# hw/tpm/tpm_spapr.c ++tpm_spapr_show_buffer(const char *direction, size_t len, const char *buf) "direction: %s len: %zu\n%s" ++tpm_spapr_do_crq(uint8_t raw1, uint8_t raw2) "1st 2 bytes in CRQ: 0x%02x 0x%02x" ++tpm_spapr_do_crq_crq_result(void) "SPAPR_VTPM_INIT_CRQ_RESULT" ++tpm_spapr_do_crq_crq_complete_result(void) "SPAPR_VTPM_INIT_CRQ_COMP_RESULT" ++tpm_spapr_do_crq_tpm_command(void) "got TPM command payload" ++tpm_spapr_do_crq_tpm_get_rtce_buffer_size(size_t buffersize) "response: buffer size is %zu" ++tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" ++tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" ++tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" ++tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 5b541a71..15979a36 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -45,11 +45,14 @@ typedef struct TPMIfClass { + + #define TYPE_TPM_TIS "tpm-tis" + #define TYPE_TPM_CRB "tpm-crb" ++#define TYPE_TPM_SPAPR "tpm-spapr" + + #define TPM_IS_TIS(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS) + #define TPM_IS_CRB(chr) \ + object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB) ++#define TPM_IS_SPAPR(chr) \ ++ object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR) + + /* returns NULL unless there is exactly one TPM device */ + static inline TPMIf *tpm_find(void) +diff --git a/qapi/tpm.json b/qapi/tpm.json +index b30323bb..63878aa0 100644 +--- a/qapi/tpm.json ++++ b/qapi/tpm.json +@@ -12,11 +12,11 @@ + # + # @tpm-tis: TPM TIS model + # @tpm-crb: TPM CRB model (since 2.12) ++# @tpm-spapr: TPM SPAPR model (since 5.0) + # + # Since: 1.5 + ## +-{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb' ] } +- ++{ 'enum': 'TpmModel', 'data': [ 'tpm-tis', 'tpm-crb', 'tpm-spapr' ] } + ## + # @query-tpm-models: + # +@@ -29,7 +29,7 @@ + # Example: + # + # -> { "execute": "query-tpm-models" } +-# <- { "return": [ "tpm-tis", "tpm-crb" ] } ++# <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] } + # + ## + { 'command': 'query-tpm-models', 'returns': ['TpmModel'] } +-- +2.23.0 + diff --git a/tpm_spapr-Support-suspend-and-resume.patch b/tpm_spapr-Support-suspend-and-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..55ed521a261fe5c058d9f6b95334c0884cdfd7ea --- /dev/null +++ b/tpm_spapr-Support-suspend-and-resume.patch @@ -0,0 +1,119 @@ +From 2948d9712a7058bcdca6732101874beb1a6e00a9 Mon Sep 17 00:00:00 2001 +From: Stefan Berger +Date: Tue, 21 Jan 2020 10:29:33 -0500 +Subject: [PATCH 07/19] tpm_spapr: Support suspend and resume +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Extend the tpm_spapr frontend with VM suspend and resume support. + +Signed-off-by: Stefan Berger +Message-Id: <20200121152935.649898-5-stefanb@linux.ibm.com> +Reviewed-by: Marc-André Lureau +Signed-off-by: David Gibson +Signed-off-by: jiangfangjie +--- + hw/tpm/tpm_spapr.c | 52 ++++++++++++++++++++++++++++++++++++++++++++- + hw/tpm/trace-events | 2 ++ + 2 files changed, 53 insertions(+), 1 deletion(-) + +diff --git a/hw/tpm/tpm_spapr.c b/hw/tpm/tpm_spapr.c +index 1db9696a..8ba561f4 100644 +--- a/hw/tpm/tpm_spapr.c ++++ b/hw/tpm/tpm_spapr.c +@@ -76,6 +76,8 @@ typedef struct { + + unsigned char *buffer; + ++ uint32_t numbytes; /* number of bytes to deliver on resume */ ++ + TPMBackendCmd cmd; + + TPMBackend *be_driver; +@@ -240,6 +242,14 @@ static void tpm_spapr_request_completed(TPMIf *ti, int ret) + + /* a max. of be_buffer_size bytes can be transported */ + len = MIN(tpm_cmd_get_size(s->buffer), s->be_buffer_size); ++ ++ if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { ++ trace_tpm_spapr_caught_response(len); ++ /* defer delivery of response until .post_load */ ++ s->numbytes = len; ++ return; ++ } ++ + rc = spapr_vio_dma_write(&s->vdev, be32_to_cpu(crq->data), + s->buffer, len); + +@@ -288,6 +298,7 @@ static void tpm_spapr_reset(SpaprVioDevice *dev) + SpaprTpmState *s = VIO_SPAPR_VTPM(dev); + + s->state = SPAPR_VTPM_STATE_NONE; ++ s->numbytes = 0; + + s->be_tpm_version = tpm_backend_get_tpm_version(s->be_driver); + +@@ -309,9 +320,48 @@ static enum TPMVersion tpm_spapr_get_version(TPMIf *ti) + return tpm_backend_get_tpm_version(s->be_driver); + } + ++/* persistent state handling */ ++ ++static int tpm_spapr_pre_save(void *opaque) ++{ ++ SpaprTpmState *s = opaque; ++ ++ tpm_backend_finish_sync(s->be_driver); ++ /* ++ * we cannot deliver the results to the VM since DMA would touch VM memory ++ */ ++ ++ return 0; ++} ++ ++static int tpm_spapr_post_load(void *opaque, int version_id) ++{ ++ SpaprTpmState *s = opaque; ++ ++ if (s->numbytes) { ++ trace_tpm_spapr_post_load(); ++ /* deliver the results to the VM via DMA */ ++ tpm_spapr_request_completed(TPM_IF(s), 0); ++ s->numbytes = 0; ++ } ++ ++ return 0; ++} ++ + static const VMStateDescription vmstate_spapr_vtpm = { + .name = "tpm-spapr", +- .unmigratable = 1, ++ .pre_save = tpm_spapr_pre_save, ++ .post_load = tpm_spapr_post_load, ++ .fields = (VMStateField[]) { ++ VMSTATE_SPAPR_VIO(vdev, SpaprTpmState), ++ ++ VMSTATE_UINT8(state, SpaprTpmState), ++ VMSTATE_UINT32(numbytes, SpaprTpmState), ++ VMSTATE_VBUFFER_UINT32(buffer, SpaprTpmState, 0, NULL, numbytes), ++ /* remember DMA address */ ++ VMSTATE_UINT32(crq.data, SpaprTpmState), ++ VMSTATE_END_OF_LIST(), ++ } + }; + + static Property tpm_spapr_properties[] = { +diff --git a/hw/tpm/trace-events b/hw/tpm/trace-events +index edbe1bd7..b97eea24 100644 +--- a/hw/tpm/trace-events ++++ b/hw/tpm/trace-events +@@ -67,3 +67,5 @@ tpm_spapr_do_crq_get_version(uint32_t version) "response: version %u" + tpm_spapr_do_crq_prepare_to_suspend(void) "response: preparing to suspend" + tpm_spapr_do_crq_unknown_msg_type(uint8_t type) "Unknown message type 0x%02x" + tpm_spapr_do_crq_unknown_crq(uint8_t raw1, uint8_t raw2) "unknown CRQ 0x%02x 0x%02x ..." ++tpm_spapr_post_load(void) "Delivering TPM response after resume" ++tpm_spapr_caught_response(uint32_t v) "Caught response to deliver after resume: %u bytes" +-- +2.23.0 + diff --git a/tz-ppc-add-dummy-read-write-methods.patch b/tz-ppc-add-dummy-read-write-methods.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee8fa6b096bf8e359cb326c581d0a72733c8c1c4 --- /dev/null +++ b/tz-ppc-add-dummy-read-write-methods.patch @@ -0,0 +1,45 @@ +From 52d1c1a258aef2b8ace50bb202ee7338ed0060f0 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:27:07 +0800 +Subject: [PATCH] tz-ppc: add dummy read/write methods + +fix CVE-2020-15469 + +Add tz-ppc-dummy mmio read/write methods to avoid assert failure +during initialisation. + +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +--- + hw/misc/tz-ppc.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/misc/tz-ppc.c b/hw/misc/tz-ppc.c +index 2a14a26f29..5b7b883866 100644 +--- a/hw/misc/tz-ppc.c ++++ b/hw/misc/tz-ppc.c +@@ -193,7 +193,20 @@ static bool tz_ppc_dummy_accepts(void *opaque, hwaddr addr, + g_assert_not_reached(); + } + ++static uint64_t tz_ppc_dummy_read(void *opaque, hwaddr addr, unsigned size) ++{ ++ g_assert_not_reached(); ++} ++ ++static void tz_ppc_dummy_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ g_assert_not_reached(); ++} ++ + static const MemoryRegionOps tz_ppc_dummy_ops = { ++ .read = tz_ppc_dummy_read, ++ .write = tz_ppc_dummy_write, + .valid.accepts = tz_ppc_dummy_accepts, + }; + +-- +2.27.0 + diff --git a/uas-add-stream-number-sanity-checks.patch b/uas-add-stream-number-sanity-checks.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b6840cc9e392a2852f79bfdb6d4447c7b75a8a5 --- /dev/null +++ b/uas-add-stream-number-sanity-checks.patch @@ -0,0 +1,61 @@ +From 2b0a54f7fb36836f148a3a237fd0ee99a1a300a2 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Wed, 18 Aug 2021 14:05:05 +0200 +Subject: [PATCH] uas: add stream number sanity checks. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The device uses the guest-supplied stream number unchecked, which can +lead to guest-triggered out-of-band access to the UASDevice->data3 and +UASDevice->status3 fields. Add the missing checks. + +Fixes: CVE-2021-3713 +Signed-off-by: Gerd Hoffmann +Reported-by: Chen Zhe +Reported-by: Tan Jingguo +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20210818120505.1258262-2-kraxel@redhat.com> +--- + hw/usb/dev-uas.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c +index abd8070d0c..82bbc0d083 100644 +--- a/hw/usb/dev-uas.c ++++ b/hw/usb/dev-uas.c +@@ -827,6 +827,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) + } + break; + case UAS_PIPE_ID_STATUS: ++ if (p->stream > UAS_MAX_STREAMS) { ++ goto err_stream; ++ } + if (p->stream) { + QTAILQ_FOREACH(st, &uas->results, next) { + if (st->stream == p->stream) { +@@ -854,6 +857,9 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) + break; + case UAS_PIPE_ID_DATA_IN: + case UAS_PIPE_ID_DATA_OUT: ++ if (p->stream > UAS_MAX_STREAMS) { ++ goto err_stream; ++ } + if (p->stream) { + req = usb_uas_find_request(uas, p->stream); + } else { +@@ -889,6 +895,11 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket *p) + p->status = USB_RET_STALL; + break; + } ++ ++err_stream: ++ error_report("%s: invalid stream %d", __func__, p->stream); ++ p->status = USB_RET_STALL; ++ return; + } + + static void usb_uas_unrealize(USBDevice *dev, Error **errp) +-- +2.27.0 + diff --git a/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch b/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch new file mode 100644 index 0000000000000000000000000000000000000000..30724cedb050b52be09a0b081ffe36cd7599d268 --- /dev/null +++ b/ui-Fix-hanging-up-Cocoa-display-on-macOS-10.15-Catal.patch @@ -0,0 +1,62 @@ +From 6705b9344f8d6f134f612c2e35e87cdda5aa6284 Mon Sep 17 00:00:00 2001 +From: Hikaru Nishida +Date: Tue, 15 Oct 2019 10:07:34 +0900 +Subject: [PATCH] ui: Fix hanging up Cocoa display on macOS 10.15 (Catalina) + +macOS API documentation says that before applicationDidFinishLaunching +is called, any events will not be processed. However, some events are +fired before it is called in macOS Catalina. This causes deadlock of +iothread_lock in handleEvent while it will be released after the +app_started_sem is posted. +This patch avoids processing events before the app_started_sem is +posted to prevent this deadlock. + +Buglink: https://bugs.launchpad.net/qemu/+bug/1847906 +Signed-off-by: Hikaru Nishida +Message-id: 20191015010734.85229-1-hikarupsp@gmail.com +Signed-off-by: Gerd Hoffmann +(cherry picked from commit dff742ad27efa474ec04accdbf422c9acfd3e30e) +Signed-off-by: Michael Roth +--- + ui/cocoa.m | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/ui/cocoa.m b/ui/cocoa.m +index c2984028c5..3026ead621 100644 +--- a/ui/cocoa.m ++++ b/ui/cocoa.m +@@ -132,6 +132,7 @@ NSArray * supportedImageFileTypes; + + static QemuSemaphore display_init_sem; + static QemuSemaphore app_started_sem; ++static bool allow_events; + + // Utility functions to run specified code block with iothread lock held + typedef void (^CodeBlock)(void); +@@ -727,6 +728,16 @@ QemuCocoaView *cocoaView; + + - (bool) handleEvent:(NSEvent *)event + { ++ if(!allow_events) { ++ /* ++ * Just let OSX have all events that arrive before ++ * applicationDidFinishLaunching. ++ * This avoids a deadlock on the iothread lock, which cocoa_display_init() ++ * will not drop until after the app_started_sem is posted. (In theory ++ * there should not be any such events, but OSX Catalina now emits some.) ++ */ ++ return false; ++ } + return bool_with_iothread_lock(^{ + return [self handleEventLocked:event]; + }); +@@ -1154,6 +1165,7 @@ QemuCocoaView *cocoaView; + - (void)applicationDidFinishLaunching: (NSNotification *) note + { + COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n"); ++ allow_events = true; + /* Tell cocoa_display_init to proceed */ + qemu_sem_post(&app_started_sem); + } +-- +2.23.0 diff --git a/upd6-check-udp6_input-buffer-size.patch b/upd6-check-udp6_input-buffer-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..d9402fe435016bd9ce9af7581a11208056bed5dd --- /dev/null +++ b/upd6-check-udp6_input-buffer-size.patch @@ -0,0 +1,36 @@ +From 054a233306e781ae3cf66b386b67b68e8ac33c37 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Fri, 4 Jun 2021 16:32:55 +0400 +Subject: [PATCH 4/6] upd6: check udp6_input buffer size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes: CVE-2021-3593 +Fixes: https://gitlab.freedesktop.org/slirp/libslirp/-/issues/45 + +Signed-off-by: Marc-André Lureau +Signed-off-by: imxcc +--- + slirp/src/udp6.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +index 6f9486bb..8c490e4d 100644 +--- a/slirp/src/udp6.c ++++ b/slirp/src/udp6.c +@@ -28,7 +28,10 @@ void udp6_input(struct mbuf *m) + ip = mtod(m, struct ip6 *); + m->m_len -= iphlen; + m->m_data += iphlen; +- uh = mtod(m, struct udphdr *); ++ uh = mtod_check(m, sizeof(struct udphdr)); ++ if (uh == NULL) { ++ goto bad; ++ } + m->m_len += iphlen; + m->m_data -= iphlen; + +-- +2.27.0 + diff --git a/update-linux-headers-Import-iommu.h.patch b/update-linux-headers-Import-iommu.h.patch new file mode 100644 index 0000000000000000000000000000000000000000..eea744e5063aeba90d9e16967f88b9d902de93f1 --- /dev/null +++ b/update-linux-headers-Import-iommu.h.patch @@ -0,0 +1,29 @@ +From 78c269f4ed09a3272d99a65d9c86977a01ef99c8 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 May 2019 10:23:42 -0400 +Subject: [PATCH] update-linux-headers: Import iommu.h + +Update the script to import the new iommu.h uapi header. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f76d77363b..dfdfdfddcf 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -141,7 +141,7 @@ done + + rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" +-for header in kvm.h vfio.h vfio_ccw.h vhost.h \ ++for header in kvm.h vfio.h vfio_ccw.h vhost.h iommu.h \ + psci.h psp-sev.h userfaultfd.h mman.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + done +-- +2.27.0 + diff --git a/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch b/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch new file mode 100644 index 0000000000000000000000000000000000000000..836e01ce8c2eda354b60c5b32b0e8303ab652f7c --- /dev/null +++ b/usb-limit-combined-packets-to-1-MiB-CVE-2021-3527.patch @@ -0,0 +1,39 @@ +From 93be5f3334394aa9a1794007aed79e75cf4d348b Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 21 Jun 2021 10:19:58 +0800 +Subject: [PATCH] usb: limit combined packets to 1 MiB (CVE-2021-3527) + +Fix CVE-2021-3527 + +usb-host and usb-redirect try to batch bulk transfers by combining many +small usb packets into a single, large transfer request, to reduce the +overhead and improve performance. + +This patch adds a size limit of 1 MiB for those combined packets to +restrict the host resources the guest can bind that way. +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann +Message-Id: <20210503132915.2335822-6-kraxel@redhat.com> + +Signed-off-by: Jiajie Li +--- + hw/usb/combined-packet.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/usb/combined-packet.c b/hw/usb/combined-packet.c +index 5d57e883dc..e56802f89a 100644 +--- a/hw/usb/combined-packet.c ++++ b/hw/usb/combined-packet.c +@@ -171,7 +171,9 @@ void usb_ep_combine_input_packets(USBEndpoint *ep) + if ((p->iov.size % ep->max_packet_size) != 0 || !p->short_not_ok || + next == NULL || + /* Work around for Linux usbfs bulk splitting + migration */ +- (totalsize == (16 * KiB - 36) && p->int_req)) { ++ (totalsize == (16 * KiB - 36) && p->int_req) || ++ /* Next package may grow combined package over 1MiB */ ++ totalsize > 1 * MiB - ep->max_packet_size) { + usb_device_handle_data(ep->dev, first); + assert(first->status == USB_RET_ASYNC); + if (first->combined) { +-- +2.27.0 + diff --git a/usbredir-Prevent-recursion-in-usbredir_write.patch b/usbredir-Prevent-recursion-in-usbredir_write.patch new file mode 100644 index 0000000000000000000000000000000000000000..29eb50f2d08b41c570fa90940ac01894c046e90e --- /dev/null +++ b/usbredir-Prevent-recursion-in-usbredir_write.patch @@ -0,0 +1,90 @@ +From 30203c01fa1bb2a7b92575683f85695a2d420b38 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 18 Dec 2019 11:30:12 +0000 +Subject: [PATCH] usbredir: Prevent recursion in usbredir_write + +I've got a case where usbredir_write manages to call back into itself +via spice; this patch causes the recursion to fail (0 bytes) the write; +this seems to avoid the deadlock I was previously seeing. + +I can't say I fully understand the interaction of usbredir and spice; +but there are a few similar guards in spice and usbredir +to catch other cases especially onces also related to spice_server_char_device_wakeup + +This case seems to be triggered by repeated migration+repeated +reconnection of the viewer; but my debugging suggests the migration +finished before this hits. + +The backtrace of the hang looks like: + reds_handle_ticket + reds_handle_other_links + reds_channel_do_link + red_channel_connect + spicevmc_connect + usbredir_create_parser + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + spice_chr_write + spice_server_char_device_wakeup + red_char_device_wakeup + red_char_device_write_to_device + vmc_write + usbredirparser_do_write + usbredir_write + qemu_chr_fe_write + qemu_chr_write + qemu_chr_write_buffer + qemu_mutex_lock_impl + +and we fail as we lang through qemu_chr_write_buffer's lock +twice. + +Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1752320 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20191218113012.13331-1-dgilbert@redhat.com> +Signed-off-by: Gerd Hoffmann +--- + hw/usb/redirect.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index 9764a57987..3cf82589ed 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -109,6 +109,7 @@ struct USBRedirDevice { + /* Properties */ + CharBackend cs; + bool enable_streams; ++ bool in_write; + uint8_t debug; + int32_t bootindex; + char *filter_str; +@@ -286,6 +287,13 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + return 0; + } + ++ /* Recursion check */ ++ if (dev->in_write) { ++ DPRINTF("usbredir_write recursion\n"); ++ return 0; ++ } ++ dev->in_write = true; ++ + r = qemu_chr_fe_write(&dev->cs, data, count); + if (r < count) { + if (!dev->watch) { +@@ -296,6 +304,7 @@ static int usbredir_write(void *priv, uint8_t *data, int count) + r = 0; + } + } ++ dev->in_write = false; + return r; + } + +-- +2.27.0 + diff --git a/usbredir-fix-buffer-overflow-on-vmload.patch b/usbredir-fix-buffer-overflow-on-vmload.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a43c35cad37bcece9822ddf61033c18dd7edfc4 --- /dev/null +++ b/usbredir-fix-buffer-overflow-on-vmload.patch @@ -0,0 +1,54 @@ +From 66fce891aecec3969d1ba979cf0a9a6df70afecd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 7 Aug 2019 12:40:48 +0400 +Subject: [PATCH] usbredir: fix buffer-overflow on vmload +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +If interface_count is NO_INTERFACE_INFO, let's not access the arrays +out-of-bounds. + +==994==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x625000243930 at pc 0x5642068086a8 bp 0x7f0b6f9ffa50 sp 0x7f0b6f9ffa40 +READ of size 1 at 0x625000243930 thread T0 + #0 0x5642068086a7 in usbredir_check_bulk_receiving /home/elmarco/src/qemu/hw/usb/redirect.c:1503 + #1 0x56420681301c in usbredir_post_load /home/elmarco/src/qemu/hw/usb/redirect.c:2154 + #2 0x5642068a56c2 in vmstate_load_state /home/elmarco/src/qemu/migration/vmstate.c:168 + #3 0x56420688e2ac in vmstate_load /home/elmarco/src/qemu/migration/savevm.c:829 + #4 0x5642068980cb in qemu_loadvm_section_start_full /home/elmarco/src/qemu/migration/savevm.c:2211 + #5 0x564206899645 in qemu_loadvm_state_main /home/elmarco/src/qemu/migration/savevm.c:2395 + #6 0x5642068998cf in qemu_loadvm_state /home/elmarco/src/qemu/migration/savevm.c:2467 + #7 0x56420685f3e9 in process_incoming_migration_co /home/elmarco/src/qemu/migration/migration.c:449 + #8 0x564207106c47 in coroutine_trampoline /home/elmarco/src/qemu/util/coroutine-ucontext.c:115 + #9 0x7f0c0604e37f (/lib64/libc.so.6+0x4d37f) + +Signed-off-by: Marc-André Lureau +Reviewed-by: Liam Merwick +Reviewed-by: Li Qiang +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20190807084048.4258-1-marcandre.lureau@redhat.com +Signed-off-by: Gerd Hoffmann +Signed-off-by: Zhenyu Ye +--- + hw/usb/redirect.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index 998fc6e4..9764a579 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -1495,6 +1495,11 @@ static void usbredir_check_bulk_receiving(USBRedirDevice *dev) + for (i = EP2I(USB_DIR_IN); i < MAX_ENDPOINTS; i++) { + dev->endpoint[i].bulk_receiving_enabled = 0; + } ++ ++ if (dev->interface_info.interface_count == NO_INTERFACE_INFO) { ++ return; ++ } ++ + for (i = 0; i < dev->interface_info.interface_count; i++) { + quirks = usb_get_quirks(dev->device_info.vendor_id, + dev->device_info.product_id, +-- +2.22.0.windows.1 + diff --git a/usbredir-fix-free-call.patch b/usbredir-fix-free-call.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4e65ef4e045e87cb61d8b713260715cedb5d79e --- /dev/null +++ b/usbredir-fix-free-call.patch @@ -0,0 +1,38 @@ +From 642ace93283c326666a9bbc2f8cf5b483fca2a6a Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Thu, 22 Jul 2021 09:27:56 +0200 +Subject: [PATCH] usbredir: fix free call +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +data might point into the middle of a larger buffer, there is a separate +free_on_destroy pointer passed into bufp_alloc() to handle that. It is +only used in the normal workflow though, not when dropping packets due +to the queue being full. Fix that. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/491 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Marc-André Lureau +Message-Id: <20210722072756.647673-1-kraxel@redhat.com> +Signed-off-by: imxcc +--- + hw/usb/redirect.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c +index 3cf82589ed..71f3594f4a 100644 +--- a/hw/usb/redirect.c ++++ b/hw/usb/redirect.c +@@ -468,7 +468,7 @@ static int bufp_alloc(USBRedirDevice *dev, uint8_t *data, uint16_t len, + if (dev->endpoint[EP2I(ep)].bufpq_dropping_packets) { + if (dev->endpoint[EP2I(ep)].bufpq_size > + dev->endpoint[EP2I(ep)].bufpq_target_size) { +- free(data); ++ free(free_on_destroy); + return -1; + } + dev->endpoint[EP2I(ep)].bufpq_dropping_packets = 0; +-- +2.27.0 + diff --git a/util-add-slirp_fmt-helpers.patch b/util-add-slirp_fmt-helpers.patch new file mode 100644 index 0000000000000000000000000000000000000000..b752f1293a0b88ef0ae01410a2fc63f9ff875df8 --- /dev/null +++ b/util-add-slirp_fmt-helpers.patch @@ -0,0 +1,124 @@ +From f3475a4a22dd84be0d2d7daa11676ac861da64bc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureauls?= +Date: Tue, 14 Apr 2020 18:51:39 +0800 +Subject: [PATCH] util: add slirp_fmt() helpers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Various calls to snprintf() in libslirp assume that snprintf() returns +"only" the number of bytes written (excluding terminating NUL). + +https://pubs.opengroup.org/onlinepubs/9699919799/functions/snprintf.html#tag_16_159_04 + +"Upon successful completion, the snprintf() function shall return the +number of bytes that would be written to s had n been sufficiently +large excluding the terminating null byte." + +Introduce slirp_fmt() that handles several pathological cases the +way libslirp usually expect: + +- treat error as fatal (instead of silently returning -1) + +- fmt0() will always \0 end + +- return the number of bytes actually written (instead of what would +have been written, which would usually result in OOB later), including +the ending \0 for fmt0() + +- warn if truncation happened (instead of ignoring) + +Other less common cases can still be handled with strcpy/snprintf() etc. +Signed-off-by: default avatarMarc-André Lureau +Reviewed-by: Samuel Thibault's avatarSamuel Thibault +Message-Id: <20200127092414.169796-2-marcandre.lureau@redhat.com> +--- + slirp/src/util.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ + slirp/src/util.h | 3 +++ + 2 files changed, 66 insertions(+) + +diff --git a/slirp/src/util.c b/slirp/src/util.c +index e5960871..dcae899e 100644 +--- a/slirp/src/util.c ++++ b/slirp/src/util.c +@@ -364,3 +364,66 @@ void slirp_pstrcpy(char *buf, int buf_size, const char *str) + } + *q = '\0'; + } ++ ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv > size) { ++ g_critical("vsnprintf() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("vsnprintf() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} ++ +diff --git a/slirp/src/util.h b/slirp/src/util.h +index 3c6223ce..0558dfc2 100644 +--- a/slirp/src/util.h ++++ b/slirp/src/util.h +@@ -177,4 +177,7 @@ static inline int slirp_socket_set_fast_reuse(int fd) + + void slirp_pstrcpy(char *buf, int buf_size, const char *str); + ++int slirp_fmt(char *str, size_t size, const char *format, ...); ++int slirp_fmt0(char *str, size_t size, const char *format, ...); ++ + #endif +-- +2.23.0 diff --git a/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch b/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch new file mode 100644 index 0000000000000000000000000000000000000000..797d71e73d5115ead1f4bdf6ae3ed4aabebeb572 --- /dev/null +++ b/util-cacheinfo-fix-crash-when-compiling-with-uClibc.patch @@ -0,0 +1,45 @@ +From 00b5032eaddb7193f03f0a28b10286244d2e2a7b Mon Sep 17 00:00:00 2001 +From: Carlos Santos +Date: Thu, 17 Oct 2019 09:37:13 -0300 +Subject: [PATCH] util/cacheinfo: fix crash when compiling with uClibc + +uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE +but the corresponding sysconf calls returns -1, which is a valid result, +meaning that the limit is indeterminate. + +Handle this situation using the fallback values instead of crashing due +to an assertion failure. + +Signed-off-by: Carlos Santos +Message-Id: <20191017123713.30192-1-casantos@redhat.com> +Signed-off-by: Richard Henderson +--- + util/cacheinfo.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/util/cacheinfo.c b/util/cacheinfo.c +index ea6f3e99bf..d94dc6adc8 100644 +--- a/util/cacheinfo.c ++++ b/util/cacheinfo.c +@@ -93,10 +93,16 @@ static void sys_cache_info(int *isize, int *dsize) + static void sys_cache_info(int *isize, int *dsize) + { + # ifdef _SC_LEVEL1_ICACHE_LINESIZE +- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE); ++ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); ++ if (tmp_isize > 0) { ++ *isize = tmp_isize; ++ } + # endif + # ifdef _SC_LEVEL1_DCACHE_LINESIZE +- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); ++ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); ++ if (tmp_dsize > 0) { ++ *dsize = tmp_dsize; ++ } + # endif + } + #endif /* sys_cache_info */ +-- +2.27.0 + diff --git a/util-hbitmap-strict-hbitmap_reset.patch b/util-hbitmap-strict-hbitmap_reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7f568f1bc6d21ae6923a552c2536414d5d33fdd --- /dev/null +++ b/util-hbitmap-strict-hbitmap_reset.patch @@ -0,0 +1,77 @@ +From fcd7cba6acb7344aca70f5f8ec16626e817b35a5 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 6 Aug 2019 18:26:11 +0300 +Subject: [PATCH] util/hbitmap: strict hbitmap_reset + +hbitmap_reset has an unobvious property: it rounds requested region up. +It may provoke bugs, like in recently fixed write-blocking mode of +mirror: user calls reset on unaligned region, not keeping in mind that +there are possible unrelated dirty bytes, covered by rounded-up region +and information of this unrelated "dirtiness" will be lost. + +Make hbitmap_reset strict: assert that arguments are aligned, allowing +only one exception when @start + @count == hb->orig_size. It's needed +to comfort users of hbitmap_next_dirty_area, which cares about +hb->orig_size. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Max Reitz +Message-Id: <20190806152611.280389-1-vsementsov@virtuozzo.com> +[Maintainer edit: Max's suggestions from on-list. --js] +[Maintainer edit: Eric's suggestion for aligned macro. --js] +Signed-off-by: John Snow +(cherry picked from commit 48557b138383aaf69c2617ca9a88bfb394fc50ec) +*prereq for fed33bd175f663cc8c13f8a490a4f35a19756cfe +Signed-off-by: Michael Roth +--- + include/qemu/hbitmap.h | 5 +++++ + tests/test-hbitmap.c | 2 +- + util/hbitmap.c | 4 ++++ + 3 files changed, 10 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h +index 4afbe6292e..1bf944ca3d 100644 +--- a/include/qemu/hbitmap.h ++++ b/include/qemu/hbitmap.h +@@ -132,6 +132,11 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); + * @count: Number of bits to reset. + * + * Reset a consecutive range of bits in an HBitmap. ++ * @start and @count must be aligned to bitmap granularity. The only exception ++ * is resetting the tail of the bitmap: @count may be equal to hb->orig_size - ++ * @start, in this case @count may be not aligned. The sum of @start + @count is ++ * allowed to be greater than hb->orig_size, but only if @start < hb->orig_size ++ * and @start + @count = ALIGN_UP(hb->orig_size, granularity). + */ + void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); + +diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c +index 592d8219db..2be56d1597 100644 +--- a/tests/test-hbitmap.c ++++ b/tests/test-hbitmap.c +@@ -423,7 +423,7 @@ static void test_hbitmap_granularity(TestHBitmapData *data, + hbitmap_test_check(data, 0); + hbitmap_test_set(data, 0, 3); + g_assert_cmpint(hbitmap_count(data->hb), ==, 4); +- hbitmap_test_reset(data, 0, 1); ++ hbitmap_test_reset(data, 0, 2); + g_assert_cmpint(hbitmap_count(data->hb), ==, 2); + } + +diff --git a/util/hbitmap.c b/util/hbitmap.c +index bcc0acdc6a..71c6ba2c52 100644 +--- a/util/hbitmap.c ++++ b/util/hbitmap.c +@@ -476,6 +476,10 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) + /* Compute range in the last layer. */ + uint64_t first; + uint64_t last = start + count - 1; ++ uint64_t gran = 1ULL << hb->granularity; ++ ++ assert(QEMU_IS_ALIGNED(start, gran)); ++ assert(QEMU_IS_ALIGNED(count, gran) || (start + count == hb->orig_size)); + + trace_hbitmap_reset(hb, start, count, + start >> hb->granularity, last >> hb->granularity); +-- +2.23.0 diff --git a/util-iov-improve-qemu_iovec_is_zero.patch b/util-iov-improve-qemu_iovec_is_zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..0cca67b8b1ed17eb873514c177eb6e371ae21f17 --- /dev/null +++ b/util-iov-improve-qemu_iovec_is_zero.patch @@ -0,0 +1,102 @@ +From b3b76fc643912d2c86b13caff30a1151f2958702 Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:04 +0300 +Subject: [PATCH] util/iov: improve qemu_iovec_is_zero + +We'll need to check a part of qiov soon, so implement it now. + +Optimization with align down to 4 * sizeof(long) is dropped due to: +1. It is strange: it aligns length of the buffer, but where is a + guarantee that buffer pointer is aligned itself? +2. buffer_is_zero() is a better place for optimizations and it has + them. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f76889e7b947d896db51be8a4d9c941c2f70365a) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + block/io.c | 2 +- + include/qemu/iov.h | 2 +- + util/iov.c | 31 +++++++++++++++++++------------ + 3 files changed, 21 insertions(+), 14 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 06305c6ea6..dccf687acc 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1715,7 +1715,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, + + if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && + !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && +- qemu_iovec_is_zero(qiov)) { ++ qemu_iovec_is_zero(qiov, 0, qiov->size)) { + flags |= BDRV_REQ_ZERO_WRITE; + if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { + flags |= BDRV_REQ_MAY_UNMAP; +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index f3787a0cf7..29957c8a72 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -212,7 +212,7 @@ void qemu_iovec_concat(QEMUIOVector *dst, + size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes); +-bool qemu_iovec_is_zero(QEMUIOVector *qiov); ++bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); + void qemu_iovec_destroy(QEMUIOVector *qiov); + void qemu_iovec_reset(QEMUIOVector *qiov); + size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, +diff --git a/util/iov.c b/util/iov.c +index 366ff9cdd1..9ac0261853 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -451,23 +451,30 @@ void qemu_iovec_init_extended( + } + + /* +- * Check if the contents of the iovecs are all zero ++ * Check if the contents of subrange of qiov data is all zeroes. + */ +-bool qemu_iovec_is_zero(QEMUIOVector *qiov) ++bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) + { +- int i; +- for (i = 0; i < qiov->niov; i++) { +- size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); +- uint8_t *ptr = qiov->iov[i].iov_base; +- if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { ++ struct iovec *iov; ++ size_t current_offset; ++ ++ assert(offset + bytes <= qiov->size); ++ ++ iov = iov_skip_offset(qiov->iov, offset, ¤t_offset); ++ ++ while (bytes) { ++ uint8_t *base = (uint8_t *)iov->iov_base + current_offset; ++ size_t len = MIN(iov->iov_len - current_offset, bytes); ++ ++ if (!buffer_is_zero(base, len)) { + return false; + } +- for (; offs < qiov->iov[i].iov_len; offs++) { +- if (ptr[offs]) { +- return false; +- } +- } ++ ++ current_offset = 0; ++ bytes -= len; ++ iov++; + } ++ + return true; + } + +-- +2.23.0 diff --git a/util-iov-introduce-qemu_iovec_init_extended.patch b/util-iov-introduce-qemu_iovec_init_extended.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a488a63413b69816576ff8394f1e282c292e7d7 --- /dev/null +++ b/util-iov-introduce-qemu_iovec_init_extended.patch @@ -0,0 +1,177 @@ +From cff024fe856ab36db3056ba4cb1d7cfa4c39795d Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Tue, 4 Jun 2019 19:15:03 +0300 +Subject: [PATCH] util/iov: introduce qemu_iovec_init_extended + +Introduce new initialization API, to create requests with padding. Will +be used in the following patch. New API uses qemu_iovec_init_buf if +resulting io vector has only one element, to avoid extra allocations. +So, we need to update qemu_iovec_destroy to support destroying such +QIOVs. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Acked-by: Stefan Hajnoczi +Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com +Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit d953169d4840f312d3b9a54952f4a7ccfcb3b311) +*prereq for 292d06b9 +Signed-off-by: Michael Roth +--- + include/qemu/iov.h | 7 +++ + util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 5 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 48b45987b7..f3787a0cf7 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -199,6 +199,13 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) + + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); + void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); ++void qemu_iovec_init_extended( ++ QEMUIOVector *qiov, ++ void *head_buf, size_t head_len, ++ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, ++ void *tail_buf, size_t tail_len); ++void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, ++ size_t offset, size_t len); + void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); + void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); +diff --git a/util/iov.c b/util/iov.c +index 74e6ca8ed7..366ff9cdd1 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -353,6 +353,103 @@ void qemu_iovec_concat(QEMUIOVector *dst, + qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); + } + ++/* ++ * qiov_find_iov ++ * ++ * Return pointer to iovec structure, where byte at @offset in original vector ++ * @iov exactly is. ++ * Set @remaining_offset to be offset inside that iovec to the same byte. ++ */ ++static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, ++ size_t *remaining_offset) ++{ ++ while (offset > 0 && offset >= iov->iov_len) { ++ offset -= iov->iov_len; ++ iov++; ++ } ++ *remaining_offset = offset; ++ ++ return iov; ++} ++ ++/* ++ * qiov_slice ++ * ++ * Find subarray of iovec's, containing requested range. @head would ++ * be offset in first iov (returned by the function), @tail would be ++ * count of extra bytes in last iovec (returned iov + @niov - 1). ++ */ ++static struct iovec *qiov_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov) ++{ ++ struct iovec *iov, *end_iov; ++ ++ assert(offset + len <= qiov->size); ++ ++ iov = iov_skip_offset(qiov->iov, offset, head); ++ end_iov = iov_skip_offset(iov, *head + len, tail); ++ ++ if (*tail > 0) { ++ assert(*tail < end_iov->iov_len); ++ *tail = end_iov->iov_len - *tail; ++ end_iov++; ++ } ++ ++ *niov = end_iov - iov; ++ ++ return iov; ++} ++ ++/* ++ * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, ++ * and @tail_buf buffer into new qiov. ++ */ ++void qemu_iovec_init_extended( ++ QEMUIOVector *qiov, ++ void *head_buf, size_t head_len, ++ QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, ++ void *tail_buf, size_t tail_len) ++{ ++ size_t mid_head, mid_tail; ++ int total_niov, mid_niov = 0; ++ struct iovec *p, *mid_iov; ++ ++ if (mid_len) { ++ mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, ++ &mid_head, &mid_tail, &mid_niov); ++ } ++ ++ total_niov = !!head_len + mid_niov + !!tail_len; ++ if (total_niov == 1) { ++ qemu_iovec_init_buf(qiov, NULL, 0); ++ p = &qiov->local_iov; ++ } else { ++ qiov->niov = qiov->nalloc = total_niov; ++ qiov->size = head_len + mid_len + tail_len; ++ p = qiov->iov = g_new(struct iovec, qiov->niov); ++ } ++ ++ if (head_len) { ++ p->iov_base = head_buf; ++ p->iov_len = head_len; ++ p++; ++ } ++ ++ if (mid_len) { ++ memcpy(p, mid_iov, mid_niov * sizeof(*p)); ++ p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; ++ p[0].iov_len -= mid_head; ++ p[mid_niov - 1].iov_len -= mid_tail; ++ p += mid_niov; ++ } ++ ++ if (tail_len) { ++ p->iov_base = tail_buf; ++ p->iov_len = tail_len; ++ } ++} ++ + /* + * Check if the contents of the iovecs are all zero + */ +@@ -374,14 +471,19 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov) + return true; + } + ++void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, ++ size_t offset, size_t len) ++{ ++ qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); ++} ++ + void qemu_iovec_destroy(QEMUIOVector *qiov) + { +- assert(qiov->nalloc != -1); ++ if (qiov->nalloc != -1) { ++ g_free(qiov->iov); ++ } + +- qemu_iovec_reset(qiov); +- g_free(qiov->iov); +- qiov->nalloc = 0; +- qiov->iov = NULL; ++ memset(qiov, 0, sizeof(*qiov)); + } + + void qemu_iovec_reset(QEMUIOVector *qiov) +-- +2.23.0 diff --git a/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch b/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb8fb5d8f84b88bed1e48516050af5546dfae1cb --- /dev/null +++ b/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch @@ -0,0 +1,258 @@ +From 3a875293ae00266e1c82a5c382066efc4acc64ce Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:15 +0530 +Subject: [PATCH] vfio: Add VM state change handler to know state of VM + +VM state change handler is called on change in VM's state. Based on +VM state, VFIO device state should be changed. +Added read/write helper functions for migration region. +Added function to set device_state. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +[aw: lx -> HWADDR_PRIx, remove redundant parens] +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/migration.c | 160 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 2 + + include/hw/vfio/vfio-common.h | 4 + + 3 files changed, 166 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index fd7faf423c..ca82c78536 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -10,6 +10,7 @@ + #include "qemu/osdep.h" + #include + ++#include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" + #include "cpu.h" + #include "migration/migration.h" +@@ -22,6 +23,157 @@ + #include "exec/ram_addr.h" + #include "pci.h" + #include "trace.h" ++#include "hw/hw.h" ++ ++static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, ++ off_t off, bool iswrite) ++{ ++ int ret; ++ ++ ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : ++ pread(vbasedev->fd, val, count, off); ++ if (ret < count) { ++ error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" ++ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, ++ vbasedev->name, off, strerror(errno)); ++ return (ret < 0) ? ret : -EINVAL; ++ } ++ return 0; ++} ++ ++static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, ++ off_t off, bool iswrite) ++{ ++ int ret, done = 0; ++ __u8 *tbuf = buf; ++ ++ while (count) { ++ int bytes = 0; ++ ++ if (count >= 8 && !(off % 8)) { ++ bytes = 8; ++ } else if (count >= 4 && !(off % 4)) { ++ bytes = 4; ++ } else if (count >= 2 && !(off % 2)) { ++ bytes = 2; ++ } else { ++ bytes = 1; ++ } ++ ++ ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); ++ if (ret) { ++ return ret; ++ } ++ ++ count -= bytes; ++ done += bytes; ++ off += bytes; ++ tbuf += bytes; ++ } ++ return done; ++} ++ ++#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) ++#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) ++ ++#define VFIO_MIG_STRUCT_OFFSET(f) \ ++ offsetof(struct vfio_device_migration_info, f) ++/* ++ * Change the device_state register for device @vbasedev. Bits set in @mask ++ * are preserved, bits set in @value are set, and bits not set in either @mask ++ * or @value are cleared in device_state. If the register cannot be accessed, ++ * the resulting state would be invalid, or the device enters an error state, ++ * an error is returned. ++ */ ++ ++static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, ++ uint32_t value) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ off_t dev_state_off = region->fd_offset + ++ VFIO_MIG_STRUCT_OFFSET(device_state); ++ uint32_t device_state; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ device_state = (device_state & mask) | value; ++ ++ if (!VFIO_DEVICE_STATE_VALID(device_state)) { ++ return -EINVAL; ++ } ++ ++ ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ if (ret < 0) { ++ int rret; ++ ++ rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ ++ if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { ++ hw_error("%s: Device in error state 0x%x", vbasedev->name, ++ device_state); ++ return rret ? rret : -EIO; ++ } ++ return ret; ++ } ++ ++ migration->device_state = device_state; ++ trace_vfio_migration_set_state(vbasedev->name, device_state); ++ return 0; ++} ++ ++static void vfio_vmstate_change(void *opaque, int running, RunState state) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint32_t value, mask; ++ int ret; ++ ++ if (vbasedev->migration->vm_running == running) { ++ return; ++ } ++ ++ if (running) { ++ /* ++ * Here device state can have one of _SAVING, _RESUMING or _STOP bit. ++ * Transition from _SAVING to _RUNNING can happen if there is migration ++ * failure, in that case clear _SAVING bit. ++ * Transition from _RESUMING to _RUNNING occurs during resuming ++ * phase, in that case clear _RESUMING bit. ++ * In both the above cases, set _RUNNING bit. ++ */ ++ mask = ~VFIO_DEVICE_STATE_MASK; ++ value = VFIO_DEVICE_STATE_RUNNING; ++ } else { ++ /* ++ * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset ++ * _RUNNING bit ++ */ ++ mask = ~VFIO_DEVICE_STATE_RUNNING; ++ value = 0; ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, mask, value); ++ if (ret) { ++ /* ++ * Migration should be aborted in this case, but vm_state_notify() ++ * currently does not support reporting failures. ++ */ ++ error_report("%s: Failed to set device state 0x%x", vbasedev->name, ++ (migration->device_state & mask) | value); ++ qemu_file_set_error(migrate_get_current()->to_dst_file, ret); ++ } ++ vbasedev->migration->vm_running = running; ++ trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), ++ (migration->device_state & mask) | value); ++} + + static void vfio_migration_exit(VFIODevice *vbasedev) + { +@@ -38,6 +190,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, + { + int ret; + Object *obj; ++ VFIOMigration *migration; + + if (!vbasedev->ops->vfio_get_object) { + return -EINVAL; +@@ -64,6 +217,10 @@ static int vfio_migration_init(VFIODevice *vbasedev, + ret = -EINVAL; + goto err; + } ++ ++ migration = vbasedev->migration; ++ migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, ++ vbasedev); + return 0; + + err: +@@ -111,6 +268,9 @@ add_blocker: + void vfio_migration_finalize(VFIODevice *vbasedev) + { + if (vbasedev->migration) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ qemu_del_vm_change_state_handler(migration->vm_state); + vfio_migration_exit(vbasedev); + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index fd034ac536..1626862315 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" + + # migration.c + vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" ++vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" ++vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index e0482c2bac..533d6737ac 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -29,6 +29,7 @@ + #ifdef CONFIG_LINUX + #include + #endif ++#include "sysemu/sysemu.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -58,7 +59,10 @@ typedef struct VFIORegion { + } VFIORegion; + + typedef struct VFIOMigration { ++ VMChangeStateEntry *vm_state; + VFIORegion region; ++ uint32_t device_state; ++ int vm_running; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch b/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch new file mode 100644 index 0000000000000000000000000000000000000000..b15a1c4bfbeb108bc5a098df8d68aef288b5e9c2 --- /dev/null +++ b/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch @@ -0,0 +1,83 @@ +From 4363ea5cded9c6d2838a9564b067f583a6ef077f Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:22 +0530 +Subject: [PATCH] vfio: Add function to start and stop dirty pages tracking + +Call VFIO_IOMMU_DIRTY_PAGES ioctl to start and stop dirty pages tracking +for VFIO devices. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0d2bd9e5cd..0bdf6a1820 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -11,6 +11,7 @@ + #include "qemu/main-loop.h" + #include "qemu/cutils.h" + #include ++#include + + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" +@@ -391,10 +392,40 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) + return qemu_file_get_error(f); + } + ++static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) ++{ ++ int ret; ++ VFIOMigration *migration = vbasedev->migration; ++ VFIOContainer *container = vbasedev->group->container; ++ struct vfio_iommu_type1_dirty_bitmap dirty = { ++ .argsz = sizeof(dirty), ++ }; ++ ++ if (start) { ++ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; ++ } else { ++ return -EINVAL; ++ } ++ } else { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); ++ if (ret) { ++ error_report("Failed to set dirty tracking flag 0x%x errno: %d", ++ dirty.flags, errno); ++ return -errno; ++ } ++ return ret; ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; + ++ vfio_set_dirty_page_tracking(vbasedev, false); ++ + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } +@@ -435,6 +466,11 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return ret; + } + ++ ret = vfio_set_dirty_page_tracking(vbasedev, true); ++ if (ret) { ++ return ret; ++ } ++ + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); +-- +2.27.0 + diff --git a/vfio-Add-function-to-unmap-VFIO-region.patch b/vfio-Add-function-to-unmap-VFIO-region.patch new file mode 100644 index 0000000000000000000000000000000000000000..2cdd76a09bd44c73b42f4294055b935a31446b7e --- /dev/null +++ b/vfio-Add-function-to-unmap-VFIO-region.patch @@ -0,0 +1,103 @@ +From 68cc2be61588d14de2313342ee87eb0bb2b990e0 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:11 +0530 +Subject: [PATCH] vfio: Add function to unmap VFIO region + +This function will be used for migration region. +Migration region is mmaped when migration starts and will be unmapped when +migration is complete. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 32 ++++++++++++++++++++++++++++---- + hw/vfio/trace-events | 1 + + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a859298fda..4c32b1bb99 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -906,6 +906,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + return 0; + } + ++static void vfio_subregion_unmap(VFIORegion *region, int index) ++{ ++ trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), ++ region->mmaps[index].offset, ++ region->mmaps[index].offset + ++ region->mmaps[index].size - 1); ++ memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); ++ munmap(region->mmaps[index].mmap, region->mmaps[index].size); ++ object_unparent(OBJECT(®ion->mmaps[index].mem)); ++ region->mmaps[index].mmap = NULL; ++} ++ + int vfio_region_mmap(VFIORegion *region) + { + int i, prot = 0; +@@ -936,10 +948,7 @@ int vfio_region_mmap(VFIORegion *region) + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { +- memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); +- munmap(region->mmaps[i].mmap, region->mmaps[i].size); +- object_unparent(OBJECT(®ion->mmaps[i].mem)); +- region->mmaps[i].mmap = NULL; ++ vfio_subregion_unmap(region, i); + } + + return ret; +@@ -964,6 +973,21 @@ int vfio_region_mmap(VFIORegion *region) + return 0; + } + ++void vfio_region_unmap(VFIORegion *region) ++{ ++ int i; ++ ++ if (!region->mem) { ++ return; ++ } ++ ++ for (i = 0; i < region->nr_mmaps; i++) { ++ if (region->mmaps[i].mmap) { ++ vfio_subregion_unmap(region, i); ++ } ++ } ++} ++ + void vfio_region_exit(VFIORegion *region) + { + int i; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index b1ef55a33f..8cdc27946c 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -111,6 +111,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg + vfio_region_exit(const char *name, int index) "Device %s, region %d" + vfio_region_finalize(const char *name, int index) "Device %s, region %d" + vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" ++vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]" + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9107bd41c0..93493891ba 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -171,6 +171,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); + int vfio_region_mmap(VFIORegion *region); + void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); ++void vfio_region_unmap(VFIORegion *region); + void vfio_region_exit(VFIORegion *region); + void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); +-- +2.27.0 + diff --git a/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch b/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch new file mode 100644 index 0000000000000000000000000000000000000000..2831e94ca260e5753f2bcd0007ab036cba387b33 --- /dev/null +++ b/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch @@ -0,0 +1,162 @@ +From 1333031bd3b488ed4904a61fd292cd5aa93f8c5b Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:25 +0530 +Subject: [PATCH] vfio: Add ioctl to get dirty pages bitmap during dma unmap + +With vIOMMU, IO virtual address range can get unmapped while in pre-copy +phase of migration. In that case, unmap ioctl should return pages pinned +in that range and QEMU should find its correcponding guest physical +addresses and report those dirty. + +Suggested-by: Alex Williamson +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 93 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8773b998ac..4ce1c10734 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -320,11 +320,95 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + return true; + } + ++static bool vfio_devices_all_running_and_saving(VFIOContainer *container) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!migration_is_setup_or_active(ms->state)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &container->group_list, container_next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (!migration) { ++ return false; ++ } ++ ++ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && ++ (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ continue; ++ } else { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++static int vfio_dma_unmap_bitmap(VFIOContainer *container, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ struct vfio_iommu_type1_dma_unmap *unmap; ++ struct vfio_bitmap *bitmap; ++ uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; ++ int ret; ++ ++ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); ++ ++ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); ++ unmap->iova = iova; ++ unmap->size = size; ++ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; ++ bitmap = (struct vfio_bitmap *)&unmap->data; ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to ++ * TARGET_PAGE_SIZE. ++ */ ++ ++ bitmap->pgsize = TARGET_PAGE_SIZE; ++ bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ ++ if (bitmap->size > container->max_dirty_bitmap_size) { ++ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, ++ (uint64_t)bitmap->size); ++ ret = -E2BIG; ++ goto unmap_exit; ++ } ++ ++ bitmap->data = g_try_malloc0(bitmap->size); ++ if (!bitmap->data) { ++ ret = -ENOMEM; ++ goto unmap_exit; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); ++ if (!ret) { ++ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, ++ iotlb->translated_addr, pages); ++ } else { ++ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); ++ } ++ ++ g_free(bitmap->data); ++unmap_exit: ++ g_free(unmap); ++ return ret; ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ + static int vfio_dma_unmap(VFIOContainer *container, +- hwaddr iova, ram_addr_t size) ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) + { + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), +@@ -333,6 +417,11 @@ static int vfio_dma_unmap(VFIOContainer *container, + .size = size, + }; + ++ if (iotlb && container->dirty_pages_supported && ++ vfio_devices_all_running_and_saving(container)) { ++ return vfio_dma_unmap_bitmap(container, iova, size, iotlb); ++ } ++ + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c +@@ -380,7 +469,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || +- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && ++ (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } +@@ -530,7 +619,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + iotlb->addr_mask + 1, vaddr, ret); + } + } else { +- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); ++ ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", +@@ -816,7 +905,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + + if (try_unmap) { +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); ++ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", +-- +2.27.0 + diff --git a/vfio-Add-load-state-functions-to-SaveVMHandlers.patch b/vfio-Add-load-state-functions-to-SaveVMHandlers.patch new file mode 100644 index 0000000000000000000000000000000000000000..d70caeeef043b6c6cb53f09c9adb67b40b344862 --- /dev/null +++ b/vfio-Add-load-state-functions-to-SaveVMHandlers.patch @@ -0,0 +1,266 @@ +From ddef5d5257987f2f415ce41fdc482feda61aa796 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:19 +0530 +Subject: [PATCH] vfio: Add load state functions to SaveVMHandlers + +Sequence during _RESUMING device state: +While data for this device is available, repeat below steps: +a. read data_offset from where user application should write data. +b. write data of data_size to migration region from data_offset. +c. write data_size which indicates vendor driver that data is written in + staging buffer. + +For user, data is opaque. User should write data in the same order as +received. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 195 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 4 + + 2 files changed, 199 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index f78a77e1e3..954c064435 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) + return ret; + } + ++static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, ++ uint64_t data_size) ++{ ++ VFIORegion *region = &vbasedev->migration->region; ++ uint64_t data_offset = 0, size, report_size; ++ int ret; ++ ++ do { ++ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (data_offset + data_size > region->size) { ++ /* ++ * If data_size is greater than the data section of migration region ++ * then iterate the write buffer operation. This case can occur if ++ * size of migration region at destination is smaller than size of ++ * migration region at source. ++ */ ++ report_size = size = region->size - data_offset; ++ data_size -= size; ++ } else { ++ report_size = size = data_size; ++ data_size = 0; ++ } ++ ++ trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); ++ ++ while (size) { ++ void *buf; ++ uint64_t sec_size; ++ bool buf_alloc = false; ++ ++ buf = get_data_section_size(region, data_offset, size, &sec_size); ++ ++ if (!buf) { ++ buf = g_try_malloc(sec_size); ++ if (!buf) { ++ error_report("%s: Error allocating buffer ", __func__); ++ return -ENOMEM; ++ } ++ buf_alloc = true; ++ } ++ ++ qemu_get_buffer(f, buf, sec_size); ++ ++ if (buf_alloc) { ++ ret = vfio_mig_write(vbasedev, buf, sec_size, ++ region->fd_offset + data_offset); ++ g_free(buf); ++ ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ size -= sec_size; ++ data_offset += sec_size; ++ } ++ ++ ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); ++ if (ret < 0) { ++ return ret; ++ } ++ } while (data_size); ++ ++ return 0; ++} ++ + static int vfio_update_pending(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) + return qemu_file_get_error(f); + } + ++static int vfio_load_device_config_state(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ uint64_t data; ++ ++ if (vbasedev->ops && vbasedev->ops->vfio_load_config) { ++ int ret; ++ ++ ret = vbasedev->ops->vfio_load_config(vbasedev, f); ++ if (ret) { ++ error_report("%s: Failed to load device config space", ++ vbasedev->name); ++ return ret; ++ } ++ } ++ ++ data = qemu_get_be64(f); ++ if (data != VFIO_MIG_FLAG_END_OF_STATE) { ++ error_report("%s: Failed loading device config space, " ++ "end flag incorrect 0x%"PRIx64, vbasedev->name, data); ++ return -EINVAL; ++ } ++ ++ trace_vfio_load_device_config_state(vbasedev->name); ++ return qemu_file_get_error(f); ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + return ret; + } + ++static int vfio_load_setup(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret = 0; ++ ++ if (migration->region.mmaps) { ++ ret = vfio_region_mmap(&migration->region); ++ if (ret) { ++ error_report("%s: Failed to mmap VFIO migration region %d: %s", ++ vbasedev->name, migration->region.nr, ++ strerror(-ret)); ++ error_report("%s: Falling back to slow path", vbasedev->name); ++ } ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, ++ VFIO_DEVICE_STATE_RESUMING); ++ if (ret) { ++ error_report("%s: Failed to set state RESUMING", vbasedev->name); ++ if (migration->region.mmaps) { ++ vfio_region_unmap(&migration->region); ++ } ++ } ++ return ret; ++} ++ ++static int vfio_load_cleanup(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ vfio_migration_cleanup(vbasedev); ++ trace_vfio_load_cleanup(vbasedev->name); ++ return 0; ++} ++ ++static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) ++{ ++ VFIODevice *vbasedev = opaque; ++ int ret = 0; ++ uint64_t data; ++ ++ data = qemu_get_be64(f); ++ while (data != VFIO_MIG_FLAG_END_OF_STATE) { ++ ++ trace_vfio_load_state(vbasedev->name, data); ++ ++ switch (data) { ++ case VFIO_MIG_FLAG_DEV_CONFIG_STATE: ++ { ++ ret = vfio_load_device_config_state(f, opaque); ++ if (ret) { ++ return ret; ++ } ++ break; ++ } ++ case VFIO_MIG_FLAG_DEV_SETUP_STATE: ++ { ++ data = qemu_get_be64(f); ++ if (data == VFIO_MIG_FLAG_END_OF_STATE) { ++ return ret; ++ } else { ++ error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, ++ vbasedev->name, data); ++ return -EINVAL; ++ } ++ break; ++ } ++ case VFIO_MIG_FLAG_DEV_DATA_STATE: ++ { ++ uint64_t data_size = qemu_get_be64(f); ++ ++ if (data_size) { ++ ret = vfio_load_buffer(f, vbasedev, data_size); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ break; ++ } ++ default: ++ error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); ++ return -EINVAL; ++ } ++ ++ data = qemu_get_be64(f); ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ } ++ return ret; ++} ++ + static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, + .save_live_pending = vfio_save_pending, + .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, ++ .load_setup = vfio_load_setup, ++ .load_cleanup = vfio_load_cleanup, ++ .load_state = vfio_load_state, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9a1c5e17d9..4f08f5a633 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -157,3 +157,7 @@ vfio_save_device_config_state(const char *name) " (%s)" + vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 + vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" + vfio_save_complete_precopy(const char *name) " (%s)" ++vfio_load_device_config_state(const char *name) " (%s)" ++vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 ++vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 ++vfio_load_cleanup(const char *name) " (%s)" +-- +2.27.0 + diff --git a/vfio-Add-migration-region-initialization-and-finaliz.patch b/vfio-Add-migration-region-initialization-and-finaliz.patch new file mode 100644 index 0000000000000000000000000000000000000000..c804f1f6c353143ca74fe05889d5e163ab9dc8c3 --- /dev/null +++ b/vfio-Add-migration-region-initialization-and-finaliz.patch @@ -0,0 +1,209 @@ +From b7128f8aa03482634c07691cef69e7ed2d35200e Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:14 +0530 +Subject: [PATCH] vfio: Add migration region initialization and finalize + function + +Whether the VFIO device supports migration or not is decided based of +migration region query. If migration region query is successful and migration +region initialization is successful then migration is supported else +migration is blocked. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Acked-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/Makefile.objs | 2 +- + hw/vfio/migration.c | 122 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 3 + + include/hw/vfio/vfio-common.h | 9 +++ + 4 files changed, 135 insertions(+), 1 deletion(-) + create mode 100644 hw/vfio/migration.c + +diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs +index abad8b818c..36033d1437 100644 +--- a/hw/vfio/Makefile.objs ++++ b/hw/vfio/Makefile.objs +@@ -1,4 +1,4 @@ +-obj-y += common.o spapr.o ++obj-y += common.o spapr.o migration.o + obj-$(CONFIG_VFIO_PCI) += pci.o pci-quirks.o display.o + obj-$(CONFIG_VFIO_CCW) += ccw.o + obj-$(CONFIG_VFIO_PLATFORM) += platform.o +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +new file mode 100644 +index 0000000000..fd7faf423c +--- /dev/null ++++ b/hw/vfio/migration.c +@@ -0,0 +1,122 @@ ++/* ++ * Migration support for VFIO devices ++ * ++ * Copyright NVIDIA, Inc. 2020 ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include ++ ++#include "hw/vfio/vfio-common.h" ++#include "cpu.h" ++#include "migration/migration.h" ++#include "migration/qemu-file.h" ++#include "migration/register.h" ++#include "migration/blocker.h" ++#include "migration/misc.h" ++#include "qapi/error.h" ++#include "exec/ramlist.h" ++#include "exec/ram_addr.h" ++#include "pci.h" ++#include "trace.h" ++ ++static void vfio_migration_exit(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ vfio_region_exit(&migration->region); ++ vfio_region_finalize(&migration->region); ++ g_free(vbasedev->migration); ++ vbasedev->migration = NULL; ++} ++ ++static int vfio_migration_init(VFIODevice *vbasedev, ++ struct vfio_region_info *info) ++{ ++ int ret; ++ Object *obj; ++ ++ if (!vbasedev->ops->vfio_get_object) { ++ return -EINVAL; ++ } ++ ++ obj = vbasedev->ops->vfio_get_object(vbasedev); ++ if (!obj) { ++ return -EINVAL; ++ } ++ ++ vbasedev->migration = g_new0(VFIOMigration, 1); ++ ++ ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, ++ info->index, "migration"); ++ if (ret) { ++ error_report("%s: Failed to setup VFIO migration region %d: %s", ++ vbasedev->name, info->index, strerror(-ret)); ++ goto err; ++ } ++ ++ if (!vbasedev->migration->region.size) { ++ error_report("%s: Invalid zero-sized VFIO migration region %d", ++ vbasedev->name, info->index); ++ ret = -EINVAL; ++ goto err; ++ } ++ return 0; ++ ++err: ++ vfio_migration_exit(vbasedev); ++ return ret; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) ++{ ++ struct vfio_region_info *info = NULL; ++ Error *local_err = NULL; ++ int ret; ++ ++ ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, ++ VFIO_REGION_SUBTYPE_MIGRATION, &info); ++ if (ret) { ++ goto add_blocker; ++ } ++ ++ ret = vfio_migration_init(vbasedev, info); ++ if (ret) { ++ goto add_blocker; ++ } ++ ++ g_free(info); ++ trace_vfio_migration_probe(vbasedev->name, info->index); ++ return 0; ++ ++add_blocker: ++ error_setg(&vbasedev->migration_blocker, ++ "VFIO device doesn't support migration"); ++ g_free(info); ++ ++ ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++ return ret; ++} ++ ++void vfio_migration_finalize(VFIODevice *vbasedev) ++{ ++ if (vbasedev->migration) { ++ vfio_migration_exit(vbasedev); ++ } ++ ++ if (vbasedev->migration_blocker) { ++ migrate_del_blocker(vbasedev->migration_blocker); ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++} +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 8cdc27946c..fd034ac536 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -143,3 +143,6 @@ vfio_display_edid_link_up(void) "" + vfio_display_edid_link_down(void) "" + vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" + vfio_display_edid_write_error(void) "" ++ ++# migration.c ++vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6ea4898c4d..e0482c2bac 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -57,6 +57,10 @@ typedef struct VFIORegion { + uint8_t nr; /* cache the region number for debug */ + } VFIORegion; + ++typedef struct VFIOMigration { ++ VFIORegion region; ++} VFIOMigration; ++ + typedef struct VFIOAddressSpace { + AddressSpace *as; + QLIST_HEAD(, VFIOContainer) containers; +@@ -113,6 +117,8 @@ typedef struct VFIODevice { + unsigned int num_irqs; + unsigned int num_regions; + unsigned int flags; ++ VFIOMigration *migration; ++ Error *migration_blocker; + } VFIODevice; + + struct VFIODeviceOps { +@@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container, + int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + ++int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); ++void vfio_migration_finalize(VFIODevice *vbasedev); ++ + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.27.0 + diff --git a/vfio-Add-migration-state-change-notifier.patch b/vfio-Add-migration-state-change-notifier.patch new file mode 100644 index 0000000000000000000000000000000000000000..5fe73a4cb18cd401d8d63ec8440cc361bbae60d9 --- /dev/null +++ b/vfio-Add-migration-state-change-notifier.patch @@ -0,0 +1,104 @@ +From b61729a5e0ab89d29f041202b50d042405076e62 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:16 +0530 +Subject: [PATCH] vfio: Add migration state change notifier + +Added migration state change notifier to get notification on migration state +change. These states are translated to VFIO device state and conveyed to +vendor driver. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 28 ++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + include/hw/vfio/vfio-common.h | 2 ++ + 3 files changed, 31 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index ca82c78536..0c6c9b655f 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -175,6 +175,30 @@ static void vfio_vmstate_change(void *opaque, int running, RunState state) + (migration->device_state & mask) | value); + } + ++static void vfio_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ VFIOMigration *migration = container_of(notifier, VFIOMigration, ++ migration_state); ++ VFIODevice *vbasedev = migration->vbasedev; ++ int ret; ++ ++ trace_vfio_migration_state_notifier(vbasedev->name, ++ MigrationStatus_str(s->state)); ++ ++ switch (s->state) { ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_CANCELLED: ++ case MIGRATION_STATUS_FAILED: ++ ret = vfio_migration_set_state(vbasedev, ++ ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), ++ VFIO_DEVICE_STATE_RUNNING); ++ if (ret) { ++ error_report("%s: Failed to set state RUNNING", vbasedev->name); ++ } ++ } ++} ++ + static void vfio_migration_exit(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -219,8 +243,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, + } + + migration = vbasedev->migration; ++ migration->vbasedev = vbasedev; + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); ++ migration->migration_state.notify = vfio_migration_state_notifier; ++ add_migration_state_change_notifier(&migration->migration_state); + return 0; + + err: +@@ -270,6 +297,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) + if (vbasedev->migration) { + VFIOMigration *migration = vbasedev->migration; + ++ remove_migration_state_change_notifier(&migration->migration_state); + qemu_del_vm_change_state_handler(migration->vm_state); + vfio_migration_exit(vbasedev); + } +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 1626862315..bd3d47b005 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -148,3 +148,4 @@ vfio_display_edid_write_error(void) "" + vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" + vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" + vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" ++vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 533d6737ac..efff0590ae 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -59,10 +59,12 @@ typedef struct VFIORegion { + } VFIORegion; + + typedef struct VFIOMigration { ++ struct VFIODevice *vbasedev; + VMChangeStateEntry *vm_state; + VFIORegion region; + uint32_t device_state; + int vm_running; ++ Notifier migration_state; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch b/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..232efcf5dbcc072f358e3fec5a5f1186a1a97b55 --- /dev/null +++ b/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch @@ -0,0 +1,106 @@ +From 92f104ca6e35acae079ca3bb432f24452058d483 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:13 +0530 +Subject: [PATCH] vfio: Add save and load functions for VFIO PCI devices + +Added functions to save and restore PCI device specific data, +specifically config space of PCI device. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Signed-off-by: Alex Williamson +--- + hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 2 ++ + 2 files changed, 53 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index de0d286fc9..b9fae3ad28 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -35,6 +35,7 @@ + #include "pci.h" + #include "trace.h" + #include "qapi/error.h" ++#include "migration/qemu-file.h" + + #define TYPE_VFIO_PCI "vfio-pci" + #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) +@@ -2395,11 +2396,61 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) + return OBJECT(vdev); + } + ++static bool vfio_msix_present(void *opaque, int version_id) ++{ ++ PCIDevice *pdev = opaque; ++ ++ return msix_present(pdev); ++} ++ ++const VMStateDescription vmstate_vfio_pci_config = { ++ .name = "VFIOPCIDevice", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), ++ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ ++ vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL); ++} ++ ++static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ PCIDevice *pdev = &vdev->pdev; ++ int ret; ++ ++ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1); ++ if (ret) { ++ return ret; ++ } ++ ++ vfio_pci_write_config(pdev, PCI_COMMAND, ++ pci_get_word(pdev->config + PCI_COMMAND), 2); ++ ++ if (msi_enabled(pdev)) { ++ vfio_msi_enable(vdev); ++ } else if (msix_enabled(pdev)) { ++ vfio_msix_enable(vdev); ++ } ++ ++ return ret; ++} ++ + static VFIODeviceOps vfio_pci_ops = { + .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, + .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_intx_eoi, + .vfio_get_object = vfio_pci_get_object, ++ .vfio_save_config = vfio_pci_save_config, ++ .vfio_load_config = vfio_pci_load_config, + }; + + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 771b6d59a3..6ea4898c4d 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -120,6 +120,8 @@ struct VFIODeviceOps { + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); + Object *(*vfio_get_object)(VFIODevice *vdev); ++ void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); ++ int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); + }; + + typedef struct VFIOGroup { +-- +2.27.0 + diff --git a/vfio-Add-save-state-functions-to-SaveVMHandlers.patch b/vfio-Add-save-state-functions-to-SaveVMHandlers.patch new file mode 100644 index 0000000000000000000000000000000000000000..14047fd8a474c07c71fa4ba622e1fb33d043b02d --- /dev/null +++ b/vfio-Add-save-state-functions-to-SaveVMHandlers.patch @@ -0,0 +1,380 @@ +From 94f106f95e887d1d706e8f771fd6ad287ddac2dc Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:18 +0530 +Subject: [PATCH] vfio: Add save state functions to SaveVMHandlers + +Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy +functions. These functions handles pre-copy and stop-and-copy phase. + +In _SAVING|_RUNNING device state or pre-copy phase: +- read pending_bytes. If pending_bytes > 0, go through below steps. +- read data_offset - indicates kernel driver to write data to staging + buffer. +- read data_size - amount of data in bytes written by vendor driver in + migration region. +- read data_size bytes of data from data_offset in the migration region. +- Write data packet to file stream as below: +{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data, +VFIO_MIG_FLAG_END_OF_STATE } + +In _SAVING device state or stop-and-copy phase +a. read config space of device and save to migration file stream. This + doesn't need to be from vendor driver. Any other special config state + from driver can be saved as data in following iteration. +b. read pending_bytes. If pending_bytes > 0, go through below steps. +c. read data_offset - indicates kernel driver to write data to staging + buffer. +d. read data_size - amount of data in bytes written by vendor driver in + migration region. +e. read data_size bytes of data from data_offset in the migration region. +f. Write data packet as below: + {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data} +g. iterate through steps b to f while (pending_bytes > 0) +h. Write {VFIO_MIG_FLAG_END_OF_STATE} + +When data region is mapped, its user's responsibility to read data from +data_offset of data_size before moving to next steps. + +Added fix suggested by Artem Polyakov to reset pending_bytes in +vfio_save_iterate(). +Added fix suggested by Zhi Wang to add 0 as data size in migration stream and +add END_OF_STATE delimiter to indicate phase complete. + +Suggested-by: Artem Polyakov +Suggested-by: Zhi Wang +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 276 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 6 + + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 283 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 405228fc5a..f78a77e1e3 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -148,6 +148,151 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + return 0; + } + ++static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, ++ uint64_t data_size, uint64_t *size) ++{ ++ void *ptr = NULL; ++ uint64_t limit = 0; ++ int i; ++ ++ if (!region->mmaps) { ++ if (size) { ++ *size = MIN(data_size, region->size - data_offset); ++ } ++ return ptr; ++ } ++ ++ for (i = 0; i < region->nr_mmaps; i++) { ++ VFIOMmap *map = region->mmaps + i; ++ ++ if ((data_offset >= map->offset) && ++ (data_offset < map->offset + map->size)) { ++ ++ /* check if data_offset is within sparse mmap areas */ ++ ptr = map->mmap + data_offset - map->offset; ++ if (size) { ++ *size = MIN(data_size, map->offset + map->size - data_offset); ++ } ++ break; ++ } else if ((data_offset < map->offset) && ++ (!limit || limit > map->offset)) { ++ /* ++ * data_offset is not within sparse mmap areas, find size of ++ * non-mapped area. Check through all list since region->mmaps list ++ * is not sorted. ++ */ ++ limit = map->offset; ++ } ++ } ++ ++ if (!ptr && size) { ++ *size = limit ? MIN(data_size, limit - data_offset) : data_size; ++ } ++ return ptr; ++} ++ ++static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ uint64_t data_offset = 0, data_size = 0, sz; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, ++ migration->pending_bytes); ++ ++ qemu_put_be64(f, data_size); ++ sz = data_size; ++ ++ while (sz) { ++ void *buf; ++ uint64_t sec_size; ++ bool buf_allocated = false; ++ ++ buf = get_data_section_size(region, data_offset, sz, &sec_size); ++ ++ if (!buf) { ++ buf = g_try_malloc(sec_size); ++ if (!buf) { ++ error_report("%s: Error allocating buffer ", __func__); ++ return -ENOMEM; ++ } ++ buf_allocated = true; ++ ++ ret = vfio_mig_read(vbasedev, buf, sec_size, ++ region->fd_offset + data_offset); ++ if (ret < 0) { ++ g_free(buf); ++ return ret; ++ } ++ } ++ ++ qemu_put_buffer(f, buf, sec_size); ++ ++ if (buf_allocated) { ++ g_free(buf); ++ } ++ sz -= sec_size; ++ data_offset += sec_size; ++ } ++ ++ ret = qemu_file_get_error(f); ++ ++ if (!ret && size) { ++ *size = data_size; ++ } ++ ++ return ret; ++} ++ ++static int vfio_update_pending(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ uint64_t pending_bytes = 0; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); ++ if (ret < 0) { ++ migration->pending_bytes = 0; ++ return ret; ++ } ++ ++ migration->pending_bytes = pending_bytes; ++ trace_vfio_update_pending(vbasedev->name, pending_bytes); ++ return 0; ++} ++ ++static int vfio_save_device_config_state(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); ++ ++ if (vbasedev->ops && vbasedev->ops->vfio_save_config) { ++ vbasedev->ops->vfio_save_config(vbasedev, f); ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ trace_vfio_save_device_config_state(vbasedev->name); ++ ++ return qemu_file_get_error(f); ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -210,9 +355,140 @@ static void vfio_save_cleanup(void *opaque) + trace_vfio_save_cleanup(vbasedev->name); + } + ++static void vfio_save_pending(QEMUFile *f, void *opaque, ++ uint64_t threshold_size, ++ uint64_t *res_precopy_only, ++ uint64_t *res_compatible, ++ uint64_t *res_postcopy_only) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret; ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return; ++ } ++ ++ *res_precopy_only += migration->pending_bytes; ++ ++ trace_vfio_save_pending(vbasedev->name, *res_precopy_only, ++ *res_postcopy_only, *res_compatible); ++} ++ ++static int vfio_save_iterate(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint64_t data_size; ++ int ret; ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); ++ ++ if (migration->pending_bytes == 0) { ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ ++ if (migration->pending_bytes == 0) { ++ qemu_put_be64(f, 0); ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ /* indicates data finished, goto complete phase */ ++ return 1; ++ } ++ } ++ ++ ret = vfio_save_buffer(f, vbasedev, &data_size); ++ if (ret) { ++ error_report("%s: vfio_save_buffer failed %s", vbasedev->name, ++ strerror(errno)); ++ return ret; ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ /* ++ * Reset pending_bytes as .save_live_pending is not called during savevm or ++ * snapshot case, in such case vfio_update_pending() at the start of this ++ * function updates pending_bytes. ++ */ ++ migration->pending_bytes = 0; ++ trace_vfio_save_iterate(vbasedev->name, data_size); ++ return 0; ++} ++ ++static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint64_t data_size; ++ int ret; ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING, ++ VFIO_DEVICE_STATE_SAVING); ++ if (ret) { ++ error_report("%s: Failed to set state STOP and SAVING", ++ vbasedev->name); ++ return ret; ++ } ++ ++ ret = vfio_save_device_config_state(f, opaque); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ ++ while (migration->pending_bytes > 0) { ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); ++ ret = vfio_save_buffer(f, vbasedev, &data_size); ++ if (ret < 0) { ++ error_report("%s: Failed to save buffer", vbasedev->name); ++ return ret; ++ } ++ ++ if (data_size == 0) { ++ break; ++ } ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0); ++ if (ret) { ++ error_report("%s: Failed to set state STOPPED", vbasedev->name); ++ return ret; ++ } ++ ++ trace_vfio_save_complete_precopy(vbasedev->name); ++ return ret; ++} ++ + static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, ++ .save_live_pending = vfio_save_pending, ++ .save_live_iterate = vfio_save_iterate, ++ .save_live_complete_precopy = vfio_save_complete_precopy, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 86c18def01..9a1c5e17d9 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -151,3 +151,9 @@ vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" + vfio_save_setup(const char *name) " (%s)" + vfio_save_cleanup(const char *name) " (%s)" ++vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 ++vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 ++vfio_save_device_config_state(const char *name) " (%s)" ++vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 ++vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" ++vfio_save_complete_precopy(const char *name) " (%s)" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index efff0590ae..c825524606 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -65,6 +65,7 @@ typedef struct VFIOMigration { + uint32_t device_state; + int vm_running; + Notifier migration_state; ++ uint64_t pending_bytes; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch b/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..e13a1daf40d518a550e8af5618b2d2b537cb43cc --- /dev/null +++ b/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch @@ -0,0 +1,55 @@ +From c1de789d89132b66243fbfe253f10764ce514a08 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:12 +0530 +Subject: [PATCH] vfio: Add vfio_get_object callback to VFIODeviceOps + +Hook vfio_get_object callback for PCI devices. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/pci.c | 8 ++++++++ + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 9 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d7a4e1875c..de0d286fc9 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2388,10 +2388,18 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) + } + } + ++static Object *vfio_pci_get_object(VFIODevice *vbasedev) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ ++ return OBJECT(vdev); ++} ++ + static VFIODeviceOps vfio_pci_ops = { + .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, + .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_intx_eoi, ++ .vfio_get_object = vfio_pci_get_object, + }; + + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 93493891ba..771b6d59a3 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -119,6 +119,7 @@ struct VFIODeviceOps { + void (*vfio_compute_needs_reset)(VFIODevice *vdev); + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); ++ Object *(*vfio_get_object)(VFIODevice *vdev); + }; + + typedef struct VFIOGroup { +-- +2.27.0 + diff --git a/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch b/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch new file mode 100644 index 0000000000000000000000000000000000000000..6479a2550592f6eff10e6c244ed3648d887ed1a6 --- /dev/null +++ b/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch @@ -0,0 +1,182 @@ +From 3ac0647003d192579bcb6c1081b75d9c8ada78e0 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:23 +0530 +Subject: [PATCH] vfio: Add vfio_listener_log_sync to mark dirty pages + +vfio_listener_log_sync gets list of dirty pages from container using +VFIO_IOMMU_GET_DIRTY_BITMAP ioctl and mark those pages dirty when all +devices are stopped and saving state. +Return early for the RAM block section of mapped MMIO region. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 116 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + 2 files changed, 117 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 35168b8f3e..4d2828fc97 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -29,6 +29,7 @@ + #include "hw/vfio/vfio.h" + #include "exec/address-spaces.h" + #include "exec/memory.h" ++#include "exec/ram_addr.h" + #include "hw/hw.h" + #include "qemu/error-report.h" + #include "qemu/range.h" +@@ -36,6 +37,7 @@ + #include "sysemu/kvm.h" + #include "trace.h" + #include "qapi/error.h" ++#include "migration/migration.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -285,6 +287,39 @@ const MemoryRegionOps vfio_region_ops = { + }, + }; + ++/* ++ * Device state interfaces ++ */ ++ ++static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!migration_is_setup_or_active(ms->state)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &container->group_list, container_next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (!migration) { ++ return false; ++ } ++ ++ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && ++ !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ continue; ++ } else { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +@@ -794,9 +829,90 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + } + ++static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++ uint64_t size, ram_addr_t ram_addr) ++{ ++ struct vfio_iommu_type1_dirty_bitmap *dbitmap; ++ struct vfio_iommu_type1_dirty_bitmap_get *range; ++ uint64_t pages; ++ int ret; ++ ++ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); ++ ++ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); ++ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; ++ range->iova = iova; ++ range->size = size; ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to ++ * TARGET_PAGE_SIZE. ++ */ ++ range->bitmap.pgsize = TARGET_PAGE_SIZE; ++ ++ pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; ++ range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ range->bitmap.data = g_try_malloc0(range->bitmap.size); ++ if (!range->bitmap.data) { ++ ret = -ENOMEM; ++ goto err_out; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); ++ if (ret) { ++ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, ++ (uint64_t)range->size, errno); ++ goto err_out; ++ } ++ ++ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data, ++ ram_addr, pages); ++ ++ trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, ++ range->bitmap.size, ram_addr); ++err_out: ++ g_free(range->bitmap.data); ++ g_free(dbitmap); ++ ++ return ret; ++} ++ ++static int vfio_sync_dirty_bitmap(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ ram_addr_t ram_addr; ++ ++ ram_addr = memory_region_get_ram_addr(section->mr) + ++ section->offset_within_region; ++ ++ return vfio_get_dirty_bitmap(container, ++ TARGET_PAGE_ALIGN(section->offset_within_address_space), ++ int128_get64(section->size), ram_addr); ++} ++ ++static void vfio_listerner_log_sync(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ if (vfio_listener_skipped_section(section) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_stopped_and_saving(container)) { ++ vfio_sync_dirty_bitmap(container, section); ++ } ++} ++ + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, ++ .log_sync = vfio_listerner_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4f08f5a633..4167f35d64 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -161,3 +161,4 @@ vfio_load_device_config_state(const char *name) " (%s)" + vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 + vfio_load_cleanup(const char *name) " (%s)" ++vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 +-- +2.27.0 + diff --git a/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch b/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch new file mode 100644 index 0000000000000000000000000000000000000000..289638a9e3c453dfe9fa9e863209ddbd5ea0489f --- /dev/null +++ b/vfio-Add-vfio_prereg_listener_global_log_start-stop-.patch @@ -0,0 +1,71 @@ +From 6aa770f4b83ca068d0c8f3102edda32666a8404d Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 11 May 2021 10:08:15 +0800 +Subject: [PATCH] vfio: Add vfio_prereg_listener_global_log_start/stop in + nested stage + +In nested mode, we set up the stage 2 and stage 1 separately. In my +opinion, vfio_memory_prereg_listener is used for stage 2 and +vfio_memory_listener is used for stage 1. So it feels weird to call +the global_log_start/stop interface in vfio_memory_listener to switch +dirty tracking, although this won't cause any errors. Add +global_log_start/stop interface in vfio_memory_prereg_listener +can separate stage 2 from stage 1. + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b5f9ba816e..fb7ca63748 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1239,6 +1239,17 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + ++ /* For nested mode, vfio_prereg_listener is used to start dirty tracking */ ++ if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) { ++ vfio_set_dirty_page_tracking(container, true); ++ } ++} ++ ++static void vfio_prereg_listener_log_global_start(MemoryListener *listener) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ + vfio_set_dirty_page_tracking(container, true); + } + +@@ -1246,6 +1257,17 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + ++ /* For nested mode, vfio_prereg_listener is used to stop dirty tracking */ ++ if (container->iommu_type != VFIO_TYPE1_NESTING_IOMMU) { ++ vfio_set_dirty_page_tracking(container, false); ++ } ++} ++ ++static void vfio_prereg_listener_log_global_stop(MemoryListener *listener) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ + vfio_set_dirty_page_tracking(container, false); + } + +@@ -1614,6 +1636,8 @@ static const MemoryListener vfio_memory_listener = { + static MemoryListener vfio_memory_prereg_listener = { + .region_add = vfio_prereg_listener_region_add, + .region_del = vfio_prereg_listener_region_del, ++ .log_global_start = vfio_prereg_listener_log_global_start, ++ .log_global_stop = vfio_prereg_listener_log_global_stop, + .log_sync = vfio_prereg_listener_log_sync, + .log_clear = vfio_prereg_listener_log_clear, + }; +-- +2.27.0 + diff --git a/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch b/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch new file mode 100644 index 0000000000000000000000000000000000000000..e4da89bd477558ea9e58538c75b0c198d27e3d21 --- /dev/null +++ b/vfio-Add-vfio_prereg_listener_log_clear-to-re-enable.patch @@ -0,0 +1,84 @@ +From f959faa36fc100894a44f2e6cd7e02a183ba142a Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 31 Jul 2021 09:40:24 +0800 +Subject: [PATCH] vfio: Add vfio_prereg_listener_log_clear to re-enable mark + dirty pages + +When tracking dirty pages, we just need to pay attention to stage 2 +mappings. Legacy vfio_listener_log_clear cannot be used in nested +stage. This patch adds vfio_prereg_listener_log_clear to re-enable +dirty pages in nested mode. + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 40 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 39 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 6b00bd4c2f..b5f9ba816e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1550,6 +1550,43 @@ static int vfio_physical_log_clear(VFIOContainer *container, + return ret; + } + ++static void vfio_prereg_listener_log_clear(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr)) { ++ return; ++ } ++ ++ vfio_physical_log_clear(container, section); ++} ++ ++static int vfio_clear_dirty_bitmap(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ if (memory_region_is_iommu(section->mr)) { ++ /* ++ * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are ++ * set up separately. It is inappropriate to pass 'giova' to kernel ++ * to get dirty pages. We only need to focus on stage 2 mapping when ++ * marking dirty pages. ++ */ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ return 0; ++ } ++ ++ /* ++ * TODO: x86. With the log_clear() interface added, x86 may inplement ++ * its own method. ++ */ ++ } ++ ++ /* Here we assume that memory_region_is_ram(section->mr) == true */ ++ return vfio_physical_log_clear(container, section); ++} ++ + static void vfio_listener_log_clear(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -1561,7 +1598,7 @@ static void vfio_listener_log_clear(MemoryListener *listener, + } + + if (vfio_devices_all_dirty_tracking(container)) { +- vfio_physical_log_clear(container, section); ++ vfio_clear_dirty_bitmap(container, section); + } + } + +@@ -1578,6 +1615,7 @@ static MemoryListener vfio_memory_prereg_listener = { + .region_add = vfio_prereg_listener_region_add, + .region_del = vfio_prereg_listener_region_del, + .log_sync = vfio_prereg_listener_log_sync, ++ .log_clear = vfio_prereg_listener_log_clear, + }; + + static void vfio_listener_release(VFIOContainer *container) +-- +2.27.0 + diff --git a/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch b/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch new file mode 100644 index 0000000000000000000000000000000000000000..77a0c8a14d29280b369466b1fa9b55dc62c26228 --- /dev/null +++ b/vfio-Add-vfio_prereg_listener_log_sync-in-nested-sta.patch @@ -0,0 +1,74 @@ +From 4c5350044ac2f61ab8088278b59eb6388ca49ff1 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 11 May 2021 10:08:14 +0800 +Subject: [PATCH] vfio: Add vfio_prereg_listener_log_sync in nested stage + +In nested mode, we set up the stage 2 (gpa->hpa)and stage 1 +(giova->gpa) separately by vfio_prereg_listener_region_add() +and vfio_listener_region_add(). So when marking dirty pages +we just need to pay attention to stage 2 mappings. + +Legacy vfio_listener_log_sync cannot be used in nested stage. +This patch adds vfio_prereg_listener_log_sync to mark dirty +pages in nested mode. + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 5176fd3a3d..6b00bd4c2f 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1317,6 +1317,22 @@ static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container, + int128_get64(section->size), ram_addr); + } + ++static void vfio_prereg_listener_log_sync(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_dirty_tracking(container)) { ++ vfio_dma_sync_ram_section_dirty_bitmap(container, section); ++ } ++} ++ + typedef struct { + IOMMUNotifier n; + VFIOGuestIOMMU *giommu; +@@ -1361,6 +1377,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + ++ /* ++ * In nested mode, stage 2 (gpa->hpa) and stage 1 (giova->gpa) are ++ * set up separately. It is inappropriate to pass 'giova' to kernel ++ * to get dirty pages. We only need to focus on stage 2 mapping when ++ * marking dirty pages. ++ */ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ return 0; ++ } ++ + QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu) == section->mr && + giommu->n.start == section->offset_within_region) { +@@ -1551,6 +1577,7 @@ static const MemoryListener vfio_memory_listener = { + static MemoryListener vfio_memory_prereg_listener = { + .region_add = vfio_prereg_listener_region_add, + .region_del = vfio_prereg_listener_region_del, ++ .log_sync = vfio_prereg_listener_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +-- +2.27.0 + diff --git a/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch b/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch new file mode 100644 index 0000000000000000000000000000000000000000..e65b3a8fe54f0fbc3fc5a00949632e67798ce788 --- /dev/null +++ b/vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch @@ -0,0 +1,63 @@ +From 8113fdcf0c1383ae5b9542563656bea3753d834e Mon Sep 17 00:00:00 2001 +From: Shenming Lu +Date: Wed, 10 Mar 2021 11:02:33 +0800 +Subject: [PATCH] vfio: Avoid disabling and enabling vectors repeatedly in VFIO + migration + +In VFIO migration resume phase and some guest startups, there are +already unmasked vectors in the vector table when calling +vfio_msix_enable(). So in order to avoid inefficiently disabling +and enabling vectors repeatedly, let's allocate all needed vectors +first and then enable these unmasked vectors one by one without +disabling. + +Signed-off-by: Shenming Lu +Message-Id: <20210310030233.1133-4-lushenming@huawei.com> +Signed-off-by: Alex Williamson +--- + hw/vfio/pci.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a637c35e7a..da7c740bce 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -563,6 +563,9 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) + + static void vfio_msix_enable(VFIOPCIDevice *vdev) + { ++ PCIDevice *pdev = &vdev->pdev; ++ unsigned int nr, max_vec = 0; ++ + vfio_disable_interrupts(vdev); + + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->msix->entries); +@@ -581,11 +584,22 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) + * triggering to userspace, then immediately release the vector, leaving + * the physical device with no vectors enabled, but MSI-X enabled, just + * like the guest view. ++ * If there are already unmasked vectors (in migration resume phase and ++ * some guest startups) which will be enabled soon, we can allocate all ++ * of them here to avoid inefficiently disabling and enabling vectors ++ * repeatedly later. + */ +- vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); +- vfio_msix_vector_release(&vdev->pdev, 0); ++ if (!pdev->msix_function_masked) { ++ for (nr = 0; nr < msix_nr_vectors_allocated(pdev); nr++) { ++ if (!msix_is_masked(pdev, nr)) { ++ max_vec = nr; ++ } ++ } ++ } ++ vfio_msix_vector_do_use(pdev, max_vec, NULL, NULL); ++ vfio_msix_vector_release(pdev, max_vec); + +- if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, ++ if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use, + vfio_msix_vector_release, NULL)) { + error_report("vfio: msix_set_vector_notifiers failed"); + } +-- +2.27.0 + diff --git a/vfio-Change-default-dirty-pages-tracking-behavior-du.patch b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch new file mode 100644 index 0000000000000000000000000000000000000000..d34f0541c8589124e35a10bb220be59e64f21e53 --- /dev/null +++ b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch @@ -0,0 +1,87 @@ +From 69d1cc17c0a77dbd0d8e811cfaa899b01bf2e5bc Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 23 Nov 2020 19:53:19 +0530 +Subject: [PATCH] vfio: Change default dirty pages tracking behavior during + migration + +By default dirty pages tracking is enabled during iterative phase +(pre-copy phase). +Added per device opt-out option 'x-pre-copy-dirty-page-tracking' to +disable dirty pages tracking during iterative phase. If the option +'x-pre-copy-dirty-page-tracking=off' is set for any VFIO device, dirty +pages tracking during iterative phase will be disabled. + +Signed-off-by: Kirti Wankhede +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 11 +++++++---- + hw/vfio/pci.c | 3 +++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a86a4c4506..d9cc3509ef 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -310,7 +310,7 @@ bool vfio_mig_active(void) + return true; + } + +-static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) ++static bool vfio_devices_all_saving(VFIOContainer *container) + { + VFIOGroup *group; + VFIODevice *vbasedev; +@@ -328,8 +328,11 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + return false; + } + +- if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && +- !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { ++ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) ++ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ return false; ++ } + continue; + } else { + return false; +@@ -1088,7 +1091,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_stopped_and_saving(container)) { ++ if (vfio_devices_all_saving(container)) { + vfio_sync_dirty_bitmap(container, section); + } + } +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2795b8bd12..3641ad0c5c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3180,6 +3180,9 @@ static void vfio_instance_init(Object *obj) + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), ++ DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, ++ vbasedev.pre_copy_dirty_page_tracking, ++ ON_OFF_AUTO_ON), + DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, + display, ON_OFF_AUTO_OFF), + DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 7398631d4c..475aa9fb40 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -130,6 +130,7 @@ typedef struct VFIODevice { + unsigned int flags; + VFIOMigration *migration; + Error *migration_blocker; ++ OnOffAuto pre_copy_dirty_page_tracking; + } VFIODevice; + + struct VFIODeviceOps { +-- +2.27.0 + diff --git a/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch b/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch new file mode 100644 index 0000000000000000000000000000000000000000..65949f079ed1eb2baea2e626fbd4d6140d23350c --- /dev/null +++ b/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch @@ -0,0 +1,162 @@ +From a400753d0f1a008367165aadf375abfe86a66ed7 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:24 +0530 +Subject: [PATCH] vfio: Dirty page tracking when vIOMMU is enabled + +When vIOMMU is enabled, register MAP notifier from log_sync when all +devices in container are in stop and copy phase of migration. Call replay +and get dirty pages from notifier callback. + +Suggested-by: Alex Williamson +Signed-off-by: Kirti Wankhede +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++--- + hw/vfio/trace-events | 1 + + 2 files changed, 83 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4d2828fc97..8773b998ac 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -441,8 +441,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) + } + + /* Called with rcu_read_lock held. */ +-static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, +- bool *read_only) ++static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, ++ ram_addr_t *ram_addr, bool *read_only) + { + MemoryRegion *mr; + hwaddr xlat; +@@ -473,8 +473,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, + return false; + } + +- *vaddr = memory_region_get_ram_ptr(mr) + xlat; +- *read_only = !writable || mr->readonly; ++ if (vaddr) { ++ *vaddr = memory_region_get_ram_ptr(mr) + xlat; ++ } ++ ++ if (ram_addr) { ++ *ram_addr = memory_region_get_ram_addr(mr) + xlat; ++ } ++ ++ if (read_only) { ++ *read_only = !writable || mr->readonly; ++ } + + return true; + } +@@ -484,7 +493,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); + VFIOContainer *container = giommu->container; + hwaddr iova = iotlb->iova + giommu->iommu_offset; +- bool read_only; + void *vaddr; + int ret; + +@@ -500,7 +508,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + rcu_read_lock(); + + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { +- if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) { ++ bool read_only; ++ ++ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) { + goto out; + } + /* +@@ -881,11 +891,77 @@ err_out: + return ret; + } + ++typedef struct { ++ IOMMUNotifier n; ++ VFIOGuestIOMMU *giommu; ++} vfio_giommu_dirty_notifier; ++ ++static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ++{ ++ vfio_giommu_dirty_notifier *gdn = container_of(n, ++ vfio_giommu_dirty_notifier, n); ++ VFIOGuestIOMMU *giommu = gdn->giommu; ++ VFIOContainer *container = giommu->container; ++ hwaddr iova = iotlb->iova + giommu->iommu_offset; ++ ram_addr_t translated_addr; ++ ++ trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); ++ ++ if (iotlb->target_as != &address_space_memory) { ++ error_report("Wrong target AS \"%s\", only system memory is allowed", ++ iotlb->target_as->name ? iotlb->target_as->name : "none"); ++ return; ++ } ++ ++ rcu_read_lock(); ++ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { ++ int ret; ++ ++ ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, ++ translated_addr); ++ if (ret) { ++ error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " ++ "0x%"HWADDR_PRIx") = %d (%m)", ++ container, iova, ++ iotlb->addr_mask + 1, ret); ++ } ++ } ++ rcu_read_unlock(); ++} ++ + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { + ram_addr_t ram_addr; + ++ if (memory_region_is_iommu(section->mr)) { ++ VFIOGuestIOMMU *giommu; ++ ++ QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ if (MEMORY_REGION(giommu->iommu) == section->mr && ++ giommu->n.start == section->offset_within_region) { ++ Int128 llend; ++ vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; ++ int idx = memory_region_iommu_attrs_to_index(giommu->iommu, ++ MEMTXATTRS_UNSPECIFIED); ++ ++ llend = int128_add(int128_make64(section->offset_within_region), ++ section->size); ++ llend = int128_sub(llend, int128_one()); ++ ++ iommu_notifier_init(&gdn.n, ++ vfio_iommu_map_dirty_notify, ++ IOMMU_NOTIFIER_MAP, ++ section->offset_within_region, ++ int128_get64(llend), ++ idx); ++ memory_region_iommu_replay(giommu->iommu, &gdn.n); ++ break; ++ } ++ } ++ return 0; ++ } ++ + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4167f35d64..575ebde6e0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -162,3 +162,4 @@ vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 + vfio_load_cleanup(const char *name) " (%s)" + vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 ++vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 +-- +2.27.0 + diff --git a/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch b/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch new file mode 100644 index 0000000000000000000000000000000000000000..47d59923070d7827152f59a60304ef708bcc1c62 --- /dev/null +++ b/vfio-Fix-unregister-SaveVMHandler-in-vfio_migration_.patch @@ -0,0 +1,36 @@ +From 8dc6e7ccc5712aee457ffb1f6cf1bf3f80e778d5 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Thu, 27 May 2021 20:31:01 +0800 +Subject: [PATCH] vfio: Fix unregister SaveVMHandler in vfio_migration_finalize + +In the vfio_migration_init(), the SaveVMHandler is registered for +VFIO device. But it lacks the operation of 'unregister'. It will +lead to 'Segmentation fault (core dumped)' in +qemu_savevm_state_setup(), if performing live migration after a +VFIO device is hot deleted. + +Fixes: cd5b58f2ba (vfio: Register SaveVMHandlers for VFIO device) +Reported-by: Qixin Gan +Signed-off-by: Kunkun Jiang +Message-Id: <20210527123101.289-1-jiangkunkun@huawei.com> +Reviewed by: Kirti Wankhede +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index f1f006d584..d9e0e12824 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -893,6 +893,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) + + remove_migration_state_change_notifier(&migration->migration_state); + qemu_del_vm_change_state_handler(migration->vm_state); ++ unregister_savevm(vbasedev->dev, "vfio", vbasedev); + vfio_migration_exit(vbasedev); + } + +-- +2.27.0 + diff --git a/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch new file mode 100644 index 0000000000000000000000000000000000000000..6a2324b57811f6d375bbc7f795dc07f78baa42e2 --- /dev/null +++ b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch @@ -0,0 +1,41 @@ +From 094aca3a87e63a0e6ae01b22f382c21dd91bb03e Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Fri, 4 Dec 2020 09:42:40 +0800 +Subject: [PATCH] vfio: Fix vfio_listener_log_sync function name typo + +There is an obvious typo in the function name of the .log_sync() callback. +Spell it correctly. + +Signed-off-by: Zenghui Yu +Message-Id: <20201204014240.772-1-yuzenghui@huawei.com> +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d9cc3509ef..ebd701faa0 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1081,7 +1081,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + int128_get64(section->size), ram_addr); + } + +-static void vfio_listerner_log_sync(MemoryListener *listener, ++static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); +@@ -1099,7 +1099,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, +- .log_sync = vfio_listerner_log_sync, ++ .log_sync = vfio_listener_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +-- +2.27.0 + diff --git a/vfio-Force-nested-if-iommu-requires-it.patch b/vfio-Force-nested-if-iommu-requires-it.patch new file mode 100644 index 0000000000000000000000000000000000000000..6a6b9da3f1ebd6c44f6a298a9c456351a8a93fcd --- /dev/null +++ b/vfio-Force-nested-if-iommu-requires-it.patch @@ -0,0 +1,100 @@ +From e4122a95a30cd58e1cd6e1742928e68aa94fd7ee Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 28 Aug 2018 16:16:20 +0200 +Subject: [PATCH] vfio: Force nested if iommu requires it + +In case we detect the address space is translated by +a virtual IOMMU which requires HW nested paging to +integrate with VFIO, let's set up the container with +the VFIO_TYPE1_NESTING_IOMMU iommu_type. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 36 ++++++++++++++++++++++++++++-------- + 1 file changed, 28 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index fefa2ccfdf..c78b58d365 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1683,27 +1683,38 @@ static void vfio_put_address_space(VFIOAddressSpace *space) + * vfio_get_iommu_type - selects the richest iommu_type (v2 first) + */ + static int vfio_get_iommu_type(VFIOContainer *container, ++ bool want_nested, + Error **errp) + { +- int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, ++ int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU, ++ VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU, + VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU }; +- int i; ++ int i, ret = -EINVAL; + + for (i = 0; i < ARRAY_SIZE(iommu_types); i++) { + if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) { +- return iommu_types[i]; ++ if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) { ++ continue; ++ } ++ ret = iommu_types[i]; ++ break; + } + } +- error_setg(errp, "No available IOMMU models"); +- return -EINVAL; ++ if (ret < 0) { ++ error_setg(errp, "No available IOMMU models"); ++ } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) { ++ error_setg(errp, "Nested mode requested but not supported"); ++ ret = -EINVAL; ++ } ++ return ret; + } + + static int vfio_init_container(VFIOContainer *container, int group_fd, +- Error **errp) ++ bool want_nested, Error **errp) + { + int iommu_type, dirty_log_manual_clear, ret; + +- iommu_type = vfio_get_iommu_type(container, errp); ++ iommu_type = vfio_get_iommu_type(container, want_nested, errp); + if (iommu_type < 0) { + return iommu_type; + } +@@ -1815,6 +1826,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + VFIOContainer *container; + int ret, fd; + VFIOAddressSpace *space; ++ IOMMUMemoryRegion *iommu_mr; ++ bool nested = false; ++ ++ if (memory_region_is_iommu(as->root)) { ++ iommu_mr = IOMMU_MEMORY_REGION(as->root); ++ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED, ++ (void *)&nested); ++ } + + space = vfio_get_address_space(as); + +@@ -1879,13 +1898,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->hostwin_list); + QLIST_INIT(&container->dma_list); + +- ret = vfio_init_container(container, group->fd, errp); ++ ret = vfio_init_container(container, group->fd, nested, errp); + if (ret) { + goto free_container_exit; + } + trace_vfio_connect_new_container(group->groupid, container->fd); + + switch (container->iommu_type) { ++ case VFIO_TYPE1_NESTING_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + { +-- +2.27.0 + diff --git a/vfio-Get-migration-capability-flags-for-container.patch b/vfio-Get-migration-capability-flags-for-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..88b9bb7e6fdfe4f4a75808bbdcb5ec45d354ae15 --- /dev/null +++ b/vfio-Get-migration-capability-flags-for-container.patch @@ -0,0 +1,186 @@ +From fc49c9cbf2deba53370f48ad9db2adc5f6ceb3ba Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:21 +0530 +Subject: [PATCH] vfio: Get migration capability flags for container + +Added helper functions to get IOMMU info capability chain. +Added function to get migration capability information from that +capability chain for IOMMU container. + +Similar change was proposed earlier: +https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html + +Disable migration for devices if IOMMU module doesn't support migration +capability. + +Signed-off-by: Kirti Wankhede +Cc: Shameer Kolothum +Cc: Eric Auger +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 90 +++++++++++++++++++++++++++++++---- + hw/vfio/migration.c | 7 ++- + include/hw/vfio/vfio-common.h | 3 ++ + 3 files changed, 91 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4c32b1bb99..35168b8f3e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1210,6 +1210,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + return 0; + } + ++static int vfio_get_iommu_info(VFIOContainer *container, ++ struct vfio_iommu_type1_info **info) ++{ ++ ++ size_t argsz = sizeof(struct vfio_iommu_type1_info); ++ ++ *info = g_new0(struct vfio_iommu_type1_info, 1); ++again: ++ (*info)->argsz = argsz; ++ ++ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { ++ g_free(*info); ++ *info = NULL; ++ return -errno; ++ } ++ ++ if (((*info)->argsz > argsz)) { ++ argsz = (*info)->argsz; ++ *info = g_realloc(*info, argsz); ++ goto again; ++ } ++ ++ return 0; ++} ++ ++static struct vfio_info_cap_header * ++vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) ++{ ++ struct vfio_info_cap_header *hdr; ++ void *ptr = info; ++ ++ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { ++ return NULL; ++ } ++ ++ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { ++ if (hdr->id == id) { ++ return hdr; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void vfio_get_iommu_info_migration(VFIOContainer *container, ++ struct vfio_iommu_type1_info *info) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_iommu_type1_info_cap_migration *cap_mig; ++ ++ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); ++ if (!hdr) { ++ return; ++ } ++ ++ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, ++ header); ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. ++ */ ++ if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { ++ container->dirty_pages_supported = true; ++ container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; ++ container->dirty_pgsizes = cap_mig->pgsize_bitmap; ++ } ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -1273,6 +1342,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = fd; ++ container->dirty_pages_supported = false; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); + +@@ -1285,7 +1355,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + { +- struct vfio_iommu_type1_info info; ++ struct vfio_iommu_type1_info *info; + + /* + * FIXME: This assumes that a Type1 IOMMU can map any 64-bit +@@ -1294,15 +1364,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + * existing Type1 IOMMUs generally support any IOVA we're + * going to actually try in practice. + */ +- info.argsz = sizeof(info); +- ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); +- /* Ignore errors */ +- if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { ++ ret = vfio_get_iommu_info(container, &info); ++ ++ if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { + /* Assume 4k IOVA page size */ +- info.iova_pgsizes = 4096; ++ info->iova_pgsizes = 4096; + } +- vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); +- container->pgsizes = info.iova_pgsizes; ++ vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); ++ container->pgsizes = info->iova_pgsizes; ++ ++ if (!ret) { ++ vfio_get_iommu_info_migration(container, info); ++ } ++ g_free(info); + break; + } + case VFIO_SPAPR_TCE_v2_IOMMU: +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 954c064435..0d2bd9e5cd 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -832,9 +832,14 @@ err: + + int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + { ++ VFIOContainer *container = vbasedev->group->container; + struct vfio_region_info *info = NULL; + Error *local_err = NULL; +- int ret; ++ int ret = -ENOTSUP; ++ ++ if (!container->dirty_pages_supported) { ++ goto add_blocker; ++ } + + ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, + VFIO_REGION_SUBTYPE_MIGRATION, &info); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index c825524606..8fd0212264 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -84,6 +84,9 @@ typedef struct VFIOContainer { + unsigned iommu_type; + int error; + bool initialized; ++ bool dirty_pages_supported; ++ uint64_t dirty_pgsizes; ++ uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; +-- +2.27.0 + diff --git a/vfio-Helper-to-get-IRQ-info-including-capabilities.patch b/vfio-Helper-to-get-IRQ-info-including-capabilities.patch new file mode 100644 index 0000000000000000000000000000000000000000..16f16d32faa4d793056700a52ef33b23716801c1 --- /dev/null +++ b/vfio-Helper-to-get-IRQ-info-including-capabilities.patch @@ -0,0 +1,178 @@ +From 43fd039dcfee221eb3f86a2cf7deb287cc04e5ad Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 20 Jun 2019 16:39:57 +0200 +Subject: [PATCH] vfio: Helper to get IRQ info including capabilities + +As done for vfio regions, add helpers to retrieve irq info +including their optional capabilities. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 97 +++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + include/hw/vfio/vfio-common.h | 7 +++ + 3 files changed, 105 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index db9af3b0e5..98dc9e6f84 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1565,6 +1565,25 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id) + return NULL; + } + ++struct vfio_info_cap_header * ++vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id) ++{ ++ struct vfio_info_cap_header *hdr; ++ void *ptr = info; ++ ++ if (!(info->flags & VFIO_IRQ_INFO_FLAG_CAPS)) { ++ return NULL; ++ } ++ ++ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { ++ if (hdr->id == id) { ++ return hdr; ++ } ++ } ++ ++ return NULL; ++} ++ + static int vfio_setup_region_sparse_mmaps(VFIORegion *region, + struct vfio_region_info *info) + { +@@ -2499,6 +2518,33 @@ retry: + return 0; + } + ++int vfio_get_irq_info(VFIODevice *vbasedev, int index, ++ struct vfio_irq_info **info) ++{ ++ size_t argsz = sizeof(struct vfio_irq_info); ++ ++ *info = g_malloc0(argsz); ++ ++ (*info)->index = index; ++retry: ++ (*info)->argsz = argsz; ++ ++ if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, *info)) { ++ g_free(*info); ++ *info = NULL; ++ return -errno; ++ } ++ ++ if ((*info)->argsz > argsz) { ++ argsz = (*info)->argsz; ++ *info = g_realloc(*info, argsz); ++ ++ goto retry; ++ } ++ ++ return 0; ++} ++ + int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + uint32_t subtype, struct vfio_region_info **info) + { +@@ -2534,6 +2580,42 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + return -ENODEV; + } + ++int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type, ++ uint32_t subtype, struct vfio_irq_info **info) ++{ ++ int i; ++ ++ for (i = 0; i < vbasedev->num_irqs; i++) { ++ struct vfio_info_cap_header *hdr; ++ struct vfio_irq_info_cap_type *cap_type; ++ ++ if (vfio_get_irq_info(vbasedev, i, info)) { ++ continue; ++ } ++ ++ hdr = vfio_get_irq_info_cap(*info, VFIO_IRQ_INFO_CAP_TYPE); ++ if (!hdr) { ++ g_free(*info); ++ continue; ++ } ++ ++ cap_type = container_of(hdr, struct vfio_irq_info_cap_type, header); ++ ++ trace_vfio_get_dev_irq(vbasedev->name, i, ++ cap_type->type, cap_type->subtype); ++ ++ if (cap_type->type == type && cap_type->subtype == subtype) { ++ return 0; ++ } ++ ++ g_free(*info); ++ } ++ ++ *info = NULL; ++ return -ENODEV; ++} ++ ++ + bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) + { + struct vfio_region_info *info = NULL; +@@ -2549,6 +2631,21 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) + return ret; + } + ++bool vfio_has_irq_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) ++{ ++ struct vfio_region_info *info = NULL; ++ bool ret = false; ++ ++ if (!vfio_get_region_info(vbasedev, region, &info)) { ++ if (vfio_get_region_info_cap(info, cap_type)) { ++ ret = true; ++ } ++ g_free(info); ++ } ++ ++ return ret; ++} ++ + /* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 247b72c1eb..54e10046f5 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -117,6 +117,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" ++vfio_get_dev_irq(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" + vfio_dma_unmap_overflow_workaround(void) "" + vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" + vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b175158138..a82962ab16 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -238,6 +238,13 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type, + bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); + struct vfio_info_cap_header * + vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id); ++int vfio_get_irq_info(VFIODevice *vbasedev, int index, ++ struct vfio_irq_info **info); ++int vfio_get_dev_irq_info(VFIODevice *vbasedev, uint32_t type, ++ uint32_t subtype, struct vfio_irq_info **info); ++bool vfio_has_irq_cap(VFIODevice *vbasedev, int irq, uint16_t cap_type); ++struct vfio_info_cap_header * ++vfio_get_irq_info_cap(struct vfio_irq_info *info, uint16_t id); + #endif + extern const MemoryListener vfio_prereg_listener; + +-- +2.27.0 + diff --git a/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch b/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch new file mode 100644 index 0000000000000000000000000000000000000000..124587d1e081a65740786aefbd1033d895678245 --- /dev/null +++ b/vfio-Introduce-helpers-to-DMA-map-unmap-a-RAM-sectio.patch @@ -0,0 +1,261 @@ +From eb3bfdb61025efe2891ce6732b8829a48dd75e2d Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 30 Aug 2018 15:04:25 +0200 +Subject: [PATCH] vfio: Introduce helpers to DMA map/unmap a RAM section + +Let's introduce two helpers that allow to DMA map/unmap a RAM +section. Those helpers will be called for nested stage setup in +another call site. Also the vfio_listener_region_add/del() +structure may be clearer. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 187 +++++++++++++++++++++++++++---------------- + hw/vfio/trace-events | 4 +- + 2 files changed, 119 insertions(+), 72 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a8db784ac5..8837d33c57 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -709,13 +709,126 @@ hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end) + return NULL; + } + ++static int vfio_dma_map_ram_section(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ VFIOHostDMAWindow *hostwin; ++ Int128 llend, llsize; ++ hwaddr iova, end; ++ void *vaddr; ++ int ret; ++ ++ assert(memory_region_is_ram(section->mr)); ++ ++ iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); ++ llend = int128_make64(section->offset_within_address_space); ++ llend = int128_add(llend, section->size); ++ llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); ++ end = int128_get64(int128_sub(llend, int128_one())); ++ ++ vaddr = memory_region_get_ram_ptr(section->mr) + ++ section->offset_within_region + ++ (iova - section->offset_within_address_space); ++ ++ hostwin = hostwin_from_range(container, iova, end); ++ if (!hostwin) { ++ error_report("vfio: IOMMU Container %p can't map guest IOVA region" ++ " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, ++ container, iova, end); ++ return -EFAULT; ++ } ++ ++ trace_vfio_dma_map_ram(iova, end, vaddr); ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ if (memory_region_is_ram_device(section->mr)) { ++ hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; ++ ++ if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { ++ trace_vfio_listener_region_add_no_dma_map( ++ memory_region_name(section->mr), ++ section->offset_within_address_space, ++ int128_getlo(section->size), ++ pgmask + 1); ++ return 0; ++ } ++ } ++ ++ ret = vfio_dma_map(container, iova, int128_get64(llsize), ++ vaddr, section->readonly); ++ if (ret) { ++ error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ "0x%"HWADDR_PRIx", %p) = %d (%m)", ++ container, iova, int128_get64(llsize), vaddr, ret); ++ if (memory_region_is_ram_device(section->mr)) { ++ /* Allow unexpected mappings not to be fatal for RAM devices */ ++ return 0; ++ } ++ return ret; ++ } ++ return 0; ++} ++ ++static void vfio_dma_unmap_ram_section(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ Int128 llend, llsize; ++ hwaddr iova, end; ++ bool try_unmap = true; ++ int ret; ++ ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); ++ llend = int128_make64(section->offset_within_address_space); ++ llend = int128_add(llend, section->size); ++ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); ++ ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ end = int128_get64(int128_sub(llend, int128_one())); ++ ++ llsize = int128_sub(llend, int128_make64(iova)); ++ ++ trace_vfio_dma_unmap_ram(iova, end); ++ ++ if (memory_region_is_ram_device(section->mr)) { ++ hwaddr pgmask; ++ VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); ++ ++ assert(hostwin); /* or region_add() would have failed */ ++ ++ pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; ++ try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); ++ } ++ ++ if (try_unmap) { ++ if (int128_eq(llsize, int128_2_64())) { ++ /* The unmap ioctl doesn't accept a full 64-bit span. */ ++ llsize = int128_rshift(llsize, 1); ++ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ if (ret) { ++ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ "0x%"HWADDR_PRIx") = %d (%m)", ++ container, iova, int128_get64(llsize), ret); ++ } ++ iova += int128_get64(llsize); ++ } ++ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ if (ret) { ++ error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ "0x%"HWADDR_PRIx") = %d (%m)", ++ container, iova, int128_get64(llsize), ret); ++ } ++ } ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + hwaddr iova, end; +- Int128 llend, llsize; +- void *vaddr; ++ Int128 llend; + int ret; + VFIOHostDMAWindow *hostwin; + +@@ -842,38 +955,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + + /* Here we assume that memory_region_is_ram(section->mr)==true */ +- +- vaddr = memory_region_get_ram_ptr(section->mr) + +- section->offset_within_region + +- (iova - section->offset_within_address_space); +- +- trace_vfio_listener_region_add_ram(iova, end, vaddr); +- +- llsize = int128_sub(llend, int128_make64(iova)); +- +- if (memory_region_is_ram_device(section->mr)) { +- hwaddr pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; +- +- if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { +- trace_vfio_listener_region_add_no_dma_map( +- memory_region_name(section->mr), +- section->offset_within_address_space, +- int128_getlo(section->size), +- pgmask + 1); +- return; +- } +- } +- +- ret = vfio_dma_map(container, iova, int128_get64(llsize), +- vaddr, section->readonly); +- if (ret) { +- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " +- "0x%"HWADDR_PRIx", %p) = %d (%m)", +- container, iova, int128_get64(llsize), vaddr, ret); +- if (memory_region_is_ram_device(section->mr)) { +- /* Allow unexpected mappings not to be fatal for RAM devices */ +- return; +- } ++ if (vfio_dma_map_ram_section(container, section)) { + goto fail; + } + +@@ -902,10 +984,6 @@ static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- hwaddr iova, end; +- Int128 llend, llsize; +- int ret; +- bool try_unmap = true; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_del_skip( +@@ -945,38 +1023,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + +- iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); +- llend = int128_make64(section->offset_within_address_space); +- llend = int128_add(llend, section->size); +- llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); +- +- if (int128_ge(int128_make64(iova), llend)) { +- return; +- } +- end = int128_get64(int128_sub(llend, int128_one())); +- +- llsize = int128_sub(llend, int128_make64(iova)); +- +- trace_vfio_listener_region_del(iova, end); +- +- if (memory_region_is_ram_device(section->mr)) { +- hwaddr pgmask; +- VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); +- +- assert(hostwin); /* or region_add() would have failed */ +- +- pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; +- try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); +- } +- +- if (try_unmap) { +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); +- if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " +- "0x%"HWADDR_PRIx") = %d (%m)", +- container, iova, int128_get64(llsize), ret); +- } +- } ++ vfio_dma_unmap_ram_section(container, section); + + memory_region_unref(section->mr); + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 561dc6e758..9b6c7ca61b 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -97,10 +97,10 @@ vfio_iommu_map_notify(const char *op, uint64_t iova_start, uint64_t iova_end) "i + vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add 0x%"PRIx64" - 0x%"PRIx64 + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 +-vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" ++vfio_dma_map_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 +-vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 ++vfio_dma_unmap_ram(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_disconnect_container(int fd) "close container->fd=%d" + vfio_connect_existing_container(int groupid, int container_fd) "group=%d existing container fd=%d" + vfio_connect_new_container(int groupid, int container_fd) "group=%d new container fd=%d" +-- +2.27.0 + diff --git a/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch b/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch new file mode 100644 index 0000000000000000000000000000000000000000..274a0c08a92bfa268f3fab8a5d7842cdbab9d273 --- /dev/null +++ b/vfio-Introduce-helpers-to-mark-dirty-pages-of-a-RAM-.patch @@ -0,0 +1,64 @@ +From ff9c1f7e3e17cc2afe1b2dfa545065e91941db8b Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 11 May 2021 10:08:13 +0800 +Subject: [PATCH] vfio: Introduce helpers to mark dirty pages of a RAM section + +Extract part of the code from vfio_sync_dirty_bitmap to form a +new helper, which allows to mark dirty pages of a RAM section. +This helper will be called for nested stage. + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 21a866e545..5176fd3a3d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1304,6 +1304,19 @@ err_out: + return ret; + } + ++static int vfio_dma_sync_ram_section_dirty_bitmap(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ ram_addr_t ram_addr; ++ ++ ram_addr = memory_region_get_ram_addr(section->mr) + ++ section->offset_within_region; ++ ++ return vfio_get_dirty_bitmap(container, ++ REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), ++ int128_get64(section->size), ram_addr); ++} ++ + typedef struct { + IOMMUNotifier n; + VFIOGuestIOMMU *giommu; +@@ -1345,8 +1358,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { +- ram_addr_t ram_addr; +- + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +@@ -1375,12 +1386,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + return 0; + } + +- ram_addr = memory_region_get_ram_addr(section->mr) + +- section->offset_within_region; +- +- return vfio_get_dirty_bitmap(container, +- REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), +- int128_get64(section->size), ram_addr); ++ return vfio_dma_sync_ram_section_dirty_bitmap(container, section); + } + + static void vfio_listener_log_sync(MemoryListener *listener, +-- +2.27.0 + diff --git a/vfio-Introduce-hostwin_from_range-helper.patch b/vfio-Introduce-hostwin_from_range-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9a7099de2a6eeaa9265be66c8818ffc852e6583 --- /dev/null +++ b/vfio-Introduce-hostwin_from_range-helper.patch @@ -0,0 +1,89 @@ +From 25336cd596ff551293f1be6e108ad9277d80be0f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 22 Mar 2019 18:05:23 +0100 +Subject: [PATCH] vfio: Introduce hostwin_from_range helper + +Let's introduce a hostwin_from_range() helper that returns the +hostwin encapsulating an IOVA range or NULL if none is found. + +This improves the readibility of callers and removes the usage +of hostwin_found. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 36 +++++++++++++++++------------------- + 1 file changed, 17 insertions(+), 19 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index c78b58d365..a8db784ac5 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -696,6 +696,19 @@ out: + rcu_read_unlock(); + } + ++static VFIOHostDMAWindow * ++hostwin_from_range(VFIOContainer *container, hwaddr iova, hwaddr end) ++{ ++ VFIOHostDMAWindow *hostwin; ++ ++ QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { ++ return hostwin; ++ } ++ } ++ return NULL; ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -705,7 +718,6 @@ static void vfio_listener_region_add(MemoryListener *listener, + void *vaddr; + int ret; + VFIOHostDMAWindow *hostwin; +- bool hostwin_found; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_add_skip( +@@ -783,15 +795,8 @@ static void vfio_listener_region_add(MemoryListener *listener, + #endif + } + +- hostwin_found = false; +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { +- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { +- hostwin_found = true; +- break; +- } +- } +- +- if (!hostwin_found) { ++ hostwin = hostwin_from_range(container, iova, end); ++ if (!hostwin) { + error_report("vfio: IOMMU container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, + container, iova, end); +@@ -956,16 +961,9 @@ static void vfio_listener_region_del(MemoryListener *listener, + + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask; +- VFIOHostDMAWindow *hostwin; +- bool hostwin_found = false; ++ VFIOHostDMAWindow *hostwin = hostwin_from_range(container, iova, end); + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { +- if (hostwin->min_iova <= iova && end <= hostwin->max_iova) { +- hostwin_found = true; +- break; +- } +- } +- assert(hostwin_found); /* or region_add() would have failed */ ++ assert(hostwin); /* or region_add() would have failed */ + + pgmask = (1ULL << ctz64(hostwin->iova_pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); +-- +2.27.0 + diff --git a/vfio-Maintain-DMA-mapping-range-for-the-container.patch b/vfio-Maintain-DMA-mapping-range-for-the-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..901a5e38ea78a2c490875611f12658151da661b9 --- /dev/null +++ b/vfio-Maintain-DMA-mapping-range-for-the-container.patch @@ -0,0 +1,191 @@ +From 90a6a1ec65d55d27faf79341b2dd9418d99da187 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:04 +0800 +Subject: [PATCH] vfio: Maintain DMA mapping range for the container + +When synchronizing dirty bitmap from kernel VFIO we do it in a +per-iova-range fashion and we allocate the userspace bitmap for each of the +ioctl. This patch introduces `struct VFIODMARange` to describe a range of +the given DMA mapping with respect to a VFIO_IOMMU_MAP_DMA operation, and +make the bitmap cache of this range be persistent so that we don't need to +g_try_malloc0() every time. Note that the new structure is almost a copy of +`struct vfio_iommu_type1_dma_map` but only internally used by QEMU. + +More importantly, the cached per-iova-range dirty bitmap will be further +used when we want to add support for the CLEAR_BITMAP and this cached +bitmap will be used to guarantee we don't clear any unknown dirty bits +otherwise that can be a severe data loss issue for migration code. + +It's pretty intuitive to maintain a bitmap per container since we perform +log_sync at this granule. But I don't know how to deal with things like +memory hot-{un}plug, sparse DMA mappings, etc. Suggestions welcome. + +* yet something to-do: + - can't work with guest viommu + - no locks + - etc + +[ The idea and even the commit message are largely inherited from kvm side. + See commit 9f4bf4baa8b820c7930e23c9566c9493db7e1d25. ] + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 62 +++++++++++++++++++++++++++++++---- + include/hw/vfio/vfio-common.h | 9 +++++ + 2 files changed, 65 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 245e32df5b..c33c4c539d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -420,6 +420,29 @@ unmap_exit: + return ret; + } + ++static VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size) ++{ ++ VFIODMARange *qrange; ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ if (qrange->iova == start_addr && qrange->size == size) { ++ return qrange; ++ } ++ } ++ return NULL; ++} ++ ++static void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) ++{ ++ uint64_t pages, size; ++ ++ pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size; ++ size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; ++ ++ qrange->bitmap = g_malloc0(size); ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +@@ -433,12 +456,29 @@ static int vfio_dma_unmap(VFIOContainer *container, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; + + if (iotlb && container->dirty_pages_supported && + vfio_devices_all_running_and_saving(container)) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + ++ /* ++ * unregister the DMA range ++ * ++ * It seems that the memory layer will give us the same section as the one ++ * used in region_add(). Otherwise it'll be complicated to manipulate the ++ * bitmap across region_{add,del}. Is there any guarantee? ++ * ++ * But there is really not such a restriction on the kernel interface ++ * (VFIO_IOMMU_DIRTY_PAGES_FLAG_{UN}MAP_DMA, etc). ++ */ ++ qrange = vfio_lookup_match_range(container, iova, size); ++ assert(qrange); ++ g_free(qrange->bitmap); ++ QLIST_REMOVE(qrange, next); ++ g_free(qrange); ++ + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c +@@ -475,6 +515,14 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; ++ ++ qrange = g_malloc0(sizeof(*qrange)); ++ qrange->iova = iova; ++ qrange->size = size; ++ QLIST_INSERT_HEAD(&container->dma_list, qrange, next); ++ /* XXX allocate the dirty bitmap on demand */ ++ vfio_dma_range_init_dirty_bitmap(qrange); + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; +@@ -986,9 +1034,14 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + { + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; ++ VFIODMARange *qrange; + uint64_t pages; + int ret; + ++ qrange = vfio_lookup_match_range(container, iova, size); ++ /* the same as vfio_dma_unmap() */ ++ assert(qrange); ++ + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); +@@ -1007,11 +1060,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size; + range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; +- range->bitmap.data = g_try_malloc0(range->bitmap.size); +- if (!range->bitmap.data) { +- ret = -ENOMEM; +- goto err_out; +- } ++ ++ range->bitmap.data = (__u64 *)qrange->bitmap; + + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); + if (ret) { +@@ -1027,7 +1077,6 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, + range->bitmap.size, ram_addr); + err_out: +- g_free(range->bitmap.data); + g_free(dbitmap); + + return ret; +@@ -1681,6 +1730,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->dirty_pages_supported = false; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); ++ QLIST_INIT(&container->dma_list); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 475aa9fb40..2853dc861e 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -76,6 +76,14 @@ typedef struct VFIOAddressSpace { + + struct VFIOGroup; + ++typedef struct VFIODMARange { ++ QLIST_ENTRY(VFIODMARange) next; ++ hwaddr iova; ++ size_t size; ++ void *vaddr; /* unused */ ++ unsigned long *bitmap; /* dirty bitmap cache for this range */ ++} VFIODMARange; ++ + typedef struct VFIOContainer { + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +@@ -91,6 +99,7 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; ++ QLIST_HEAD(, VFIODMARange) dma_list; + QLIST_ENTRY(VFIOContainer) next; + } VFIOContainer; + +-- +2.27.0 + diff --git a/vfio-Make-migration-support-experimental.patch b/vfio-Make-migration-support-experimental.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bf32ecaf443b40929932743cd3d9f3b951011b2 --- /dev/null +++ b/vfio-Make-migration-support-experimental.patch @@ -0,0 +1,72 @@ +From d0a8ba1957743c55547ec2ccd8cb09b84a3354d2 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 9 Nov 2020 11:56:02 -0700 +Subject: [PATCH] vfio: Make migration support experimental + +Support for migration of vfio devices is still in flux. Developers +are attempting to add support for new devices and new architectures, +but none are yet readily available for validation. We have concerns +whether we're transferring device resources at the right point in the +migration, whether we're guaranteeing that updates during pre-copy are +migrated, and whether we can provide bit-stream compatibility should +any of this change. Even the question of whether devices should +participate in dirty page tracking during pre-copy seems contentious. +In short, migration support has not had enough soak time and it feels +premature to mark it as supported. + +Create an experimental option such that we can continue to develop. + +[Retaining previous acks/reviews for a previously identical code + change with different specifics in the commit log.] + +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + hw/vfio/pci.c | 2 ++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8546075706..033cb2b0c9 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -888,7 +888,7 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + Error *local_err = NULL; + int ret = -ENOTSUP; + +- if (!container->dirty_pages_supported) { ++ if (!vbasedev->enable_migration || !container->dirty_pages_supported) { + goto add_blocker; + } + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index da7c740bce..2795b8bd12 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3192,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), ++ DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, false), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.balloon_allowed, false), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 048731e81f..7398631d4c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -123,6 +123,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool balloon_allowed; ++ bool enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +-- +2.27.0 + diff --git a/vfio-Make-vfio-pci-device-migration-capable.patch b/vfio-Make-vfio-pci-device-migration-capable.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e87ec7d7820b8ee4be0de5d55039336c47988fd --- /dev/null +++ b/vfio-Make-vfio-pci-device-migration-capable.patch @@ -0,0 +1,73 @@ +From b20bf027d44809dd6c6376cf0b77e5c5b2057cba Mon Sep 17 00:00:00 2001 +From: Jens Freimann +Date: Tue, 29 Oct 2019 12:49:05 +0100 +Subject: [PATCH] vfio: Make vfio-pci device migration capable + +If the device is not a failover primary device, call +vfio_migration_probe() and vfio_migration_finalize() to enable +migration support for those devices that support it respectively to +tear it down again. +Removed migration blocker from VFIO PCI device specific structure and use +migration blocker from generic structure of VFIO device. + +Note: Since the current version don't add the failover feature for assigned +PCI devices, just remove the failover related code in the original patch for +simplicity. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/pci.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index b9fae3ad28..a637c35e7a 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3049,6 +3049,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + } + } + ++ ret = vfio_migration_probe(&vdev->vbasedev, errp); ++ if (ret) { ++ error_report("%s: Migration disabled", vdev->vbasedev.name); ++ } ++ + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + vfio_setup_resetfn_quirk(vdev); +@@ -3096,6 +3101,7 @@ static void vfio_exitfn(PCIDevice *pdev) + } + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); ++ vfio_migration_finalize(&vdev->vbasedev); + } + + static void vfio_pci_reset(DeviceState *dev) +@@ -3204,11 +3210,6 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + +-static const VMStateDescription vfio_pci_vmstate = { +- .name = "vfio-pci", +- .unmigratable = 1, +-}; +- + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3216,7 +3217,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + + dc->reset = vfio_pci_reset; + dc->props = vfio_pci_dev_properties; +- dc->vmsd = &vfio_pci_vmstate; + dc->desc = "VFIO-based PCI device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->realize = vfio_realize; +-- +2.27.0 + diff --git a/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch b/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch new file mode 100644 index 0000000000000000000000000000000000000000..438c426803c4f77ea21220e6917ecf27ab566857 --- /dev/null +++ b/vfio-Move-the-saving-of-the-config-space-to-the-righ.patch @@ -0,0 +1,86 @@ +From 483baf4c668fbd2da76e6948576e13eded1c54ec Mon Sep 17 00:00:00 2001 +From: Shenming Lu +Date: Wed, 10 Mar 2021 11:02:31 +0800 +Subject: [PATCH] vfio: Move the saving of the config space to the right place + in VFIO migration + +On ARM64 the VFIO SET_IRQS ioctl is dependent on the VM interrupt +setup, if the restoring of the VFIO PCI device config space is +before the VGIC, an error might occur in the kernel. + +So we move the saving of the config space to the non-iterable +process, thus it will be called after the VGIC according to +their priorities. + +As for the possible dependence of the device specific migration +data on it's config space, we can let the vendor driver to +include any config info it needs in its own data stream. + +Signed-off-by: Shenming Lu +Reviewed-by: Kirti Wankhede +Message-Id: <20210310030233.1133-2-lushenming@huawei.com> +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index b77c66557e..ea36ae5225 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -575,11 +575,6 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + return ret; + } + +- ret = vfio_save_device_config_state(f, opaque); +- if (ret) { +- return ret; +- } +- + ret = vfio_update_pending(vbasedev); + if (ret) { + return ret; +@@ -620,6 +615,19 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + return ret; + } + ++static void vfio_save_state(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ int ret; ++ ++ ret = vfio_save_device_config_state(f, opaque); ++ if (ret) { ++ error_report("%s: Failed to save device config space", ++ vbasedev->name); ++ qemu_file_set_error(f, ret); ++ } ++} ++ + static int vfio_load_setup(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; +@@ -670,11 +678,7 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + switch (data) { + case VFIO_MIG_FLAG_DEV_CONFIG_STATE: + { +- ret = vfio_load_device_config_state(f, opaque); +- if (ret) { +- return ret; +- } +- break; ++ return vfio_load_device_config_state(f, opaque); + } + case VFIO_MIG_FLAG_DEV_SETUP_STATE: + { +@@ -720,6 +724,7 @@ static SaveVMHandlers savevm_vfio_handlers = { + .save_live_pending = vfio_save_pending, + .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, ++ .save_state = vfio_save_state, + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, +-- +2.27.0 + diff --git a/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch b/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ad94b06ad73ccf44d049daa2b8ff35b3624d539 --- /dev/null +++ b/vfio-Pass-stage-1-MSI-bindings-to-the-host.patch @@ -0,0 +1,262 @@ +From 1729ae16dc557c0ad54cab3096b5cb6649d181ae Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 14 Aug 2018 08:08:11 -0400 +Subject: [PATCH] vfio: Pass stage 1 MSI bindings to the host + +We register the stage1 MSI bindings when enabling the vectors +and we unregister them on msi disable. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 59 +++++++++++++++++++++++++++ + hw/vfio/pci.c | 76 ++++++++++++++++++++++++++++++++++- + hw/vfio/trace-events | 2 + + include/hw/vfio/vfio-common.h | 12 ++++++ + 4 files changed, 147 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index cc50efdbc1..db9af3b0e5 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -709,6 +709,65 @@ static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + } + } + ++int vfio_iommu_set_msi_binding(VFIOContainer *container, int n, ++ IOMMUTLBEntry *iotlb) ++{ ++ struct vfio_iommu_type1_set_msi_binding ustruct; ++ VFIOMSIBinding *binding; ++ int ret; ++ ++ QLIST_FOREACH(binding, &container->msibinding_list, next) { ++ if (binding->index == n) { ++ return 0; ++ } ++ } ++ ++ ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding); ++ ustruct.iova = iotlb->iova; ++ ustruct.flags = VFIO_IOMMU_BIND_MSI; ++ ustruct.gpa = iotlb->translated_addr; ++ ustruct.size = iotlb->addr_mask + 1; ++ ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct); ++ if (ret) { ++ error_report("%s: failed to register the stage1 MSI binding (%m)", ++ __func__); ++ return ret; ++ } ++ binding = g_new0(VFIOMSIBinding, 1); ++ binding->iova = ustruct.iova; ++ binding->gpa = ustruct.gpa; ++ binding->size = ustruct.size; ++ binding->index = n; ++ ++ QLIST_INSERT_HEAD(&container->msibinding_list, binding, next); ++ return 0; ++} ++ ++int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n) ++{ ++ struct vfio_iommu_type1_set_msi_binding ustruct; ++ VFIOMSIBinding *binding, *tmp; ++ int ret; ++ ++ ustruct.argsz = sizeof(struct vfio_iommu_type1_set_msi_binding); ++ QLIST_FOREACH_SAFE(binding, &container->msibinding_list, next, tmp) { ++ if (binding->index != n) { ++ continue; ++ } ++ ustruct.flags = VFIO_IOMMU_UNBIND_MSI; ++ ustruct.iova = binding->iova; ++ ret = ioctl(container->fd, VFIO_IOMMU_SET_MSI_BINDING , &ustruct); ++ if (ret) { ++ error_report("Failed to unregister the stage1 MSI binding " ++ "for iova=0x%"PRIx64" (%m)", binding->iova); ++ } ++ QLIST_REMOVE(binding, next); ++ g_free(binding); ++ return ret; ++ } ++ return 0; ++} ++ + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6c90ec9278..bbcba3fd16 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -360,6 +360,65 @@ static void vfio_msi_interrupt(void *opaque) + notify(&vdev->pdev, nr); + } + ++static bool vfio_iommu_require_msi_binding(IOMMUMemoryRegion *iommu_mr) ++{ ++ bool msi_translate = false, nested = false; ++ ++ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_MSI_TRANSLATE, ++ (void *)&msi_translate); ++ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED, ++ (void *)&nested); ++ if (!nested || !msi_translate) { ++ return false; ++ } ++ return true; ++} ++ ++static int vfio_register_msi_binding(VFIOPCIDevice *vdev, ++ int vector_n, bool set) ++{ ++ VFIOContainer *container = vdev->vbasedev.group->container; ++ PCIDevice *dev = &vdev->pdev; ++ AddressSpace *as = pci_device_iommu_address_space(dev); ++ IOMMUMemoryRegionClass *imrc; ++ IOMMUMemoryRegion *iommu_mr; ++ IOMMUTLBEntry entry; ++ MSIMessage msg; ++ ++ if (as == &address_space_memory) { ++ return 0; ++ } ++ ++ iommu_mr = IOMMU_MEMORY_REGION(as->root); ++ if (!vfio_iommu_require_msi_binding(iommu_mr)) { ++ return 0; ++ } ++ ++ /* MSI doorbell address is translated by an IOMMU */ ++ ++ if (!set) { /* unregister */ ++ trace_vfio_unregister_msi_binding(vdev->vbasedev.name, vector_n); ++ ++ return vfio_iommu_unset_msi_binding(container, vector_n); ++ } ++ ++ msg = pci_get_msi_message(dev, vector_n); ++ imrc = memory_region_get_iommu_class_nocheck(iommu_mr); ++ ++ rcu_read_lock(); ++ entry = imrc->translate(iommu_mr, msg.address, IOMMU_WO, 0); ++ rcu_read_unlock(); ++ ++ if (entry.perm == IOMMU_NONE) { ++ return -ENOENT; ++ } ++ ++ trace_vfio_register_msi_binding(vdev->vbasedev.name, vector_n, ++ msg.address, entry.translated_addr); ++ ++ return vfio_iommu_set_msi_binding(container, vector_n, &entry); ++} ++ + static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) + { + struct vfio_irq_set *irq_set; +@@ -377,7 +436,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) + fds = (int32_t *)&irq_set->data; + + for (i = 0; i < vdev->nr_vectors; i++) { +- int fd = -1; ++ int ret, fd = -1; + + /* + * MSI vs MSI-X - The guest has direct access to MSI mask and pending +@@ -386,6 +445,12 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) + * KVM signaling path only when configured and unmasked. + */ + if (vdev->msi_vectors[i].use) { ++ ret = vfio_register_msi_binding(vdev, i, true); ++ if (ret) { ++ error_report("%s failed to register S1 MSI binding " ++ "for vector %d(%d)", vdev->vbasedev.name, i, ret); ++ goto out; ++ } + if (vdev->msi_vectors[i].virq < 0 || + (msix && msix_is_masked(&vdev->pdev, i))) { + fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt); +@@ -399,6 +464,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set); + ++out: + g_free(irq_set); + + return ret; +@@ -712,7 +778,8 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) + + static void vfio_msix_disable(VFIOPCIDevice *vdev) + { +- int i; ++ int ret, i; ++ + + msix_unset_vector_notifiers(&vdev->pdev); + +@@ -724,6 +791,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) + if (vdev->msi_vectors[i].use) { + vfio_msix_vector_release(&vdev->pdev, i); + msix_vector_unuse(&vdev->pdev, i); ++ ret = vfio_register_msi_binding(vdev, i, false); ++ if (ret) { ++ error_report("%s: failed to unregister S1 MSI binding " ++ "for vector %d(%d)", vdev->vbasedev.name, i, ret); ++ } + } + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index ee9a67d3ef..247b72c1eb 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -120,6 +120,8 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype + vfio_dma_unmap_overflow_workaround(void) "" + vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" + vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" ++vfio_register_msi_binding(const char *name, int vector, uint64_t giova, uint64_t gdb) "%s: register vector %d gIOVA=0x%"PRIx64 "-> gDB=0x%"PRIx64" stage 1 mapping" ++vfio_unregister_msi_binding(const char *name, int vector) "%s: unregister vector %d stage 1 mapping" + + # platform.c + vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1277914ca8..b175158138 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -74,6 +74,14 @@ typedef struct VFIOAddressSpace { + QLIST_ENTRY(VFIOAddressSpace) list; + } VFIOAddressSpace; + ++typedef struct VFIOMSIBinding { ++ int index; ++ hwaddr iova; ++ hwaddr gpa; ++ hwaddr size; ++ QLIST_ENTRY(VFIOMSIBinding) next; ++} VFIOMSIBinding; ++ + struct VFIOGroup; + + typedef struct VFIODMARange { +@@ -101,6 +109,7 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIODMARange) dma_list; ++ QLIST_HEAD(, VFIOMSIBinding) msibinding_list; + QLIST_ENTRY(VFIOContainer) next; + } VFIOContainer; + +@@ -210,6 +219,9 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); + void vfio_put_group(VFIOGroup *group); + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp); ++int vfio_iommu_set_msi_binding(VFIOContainer *container, int n, ++ IOMMUTLBEntry *entry); ++int vfio_iommu_unset_msi_binding(VFIOContainer *container, int n); + + extern const MemoryRegionOps vfio_region_ops; + typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; +-- +2.27.0 + diff --git a/vfio-Register-SaveVMHandlers-for-VFIO-device.patch b/vfio-Register-SaveVMHandlers-for-VFIO-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e12cd2bb2bbfede3be871fb31a3a96562fedc15 --- /dev/null +++ b/vfio-Register-SaveVMHandlers-for-VFIO-device.patch @@ -0,0 +1,183 @@ +From cd5b58f2ba20e59f2c29d955b8bbd7f5016030b7 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:17 +0530 +Subject: [PATCH] vfio: Register SaveVMHandlers for VFIO device + +Define flags to be used as delimiter in migration stream for VFIO devices. +Added .save_setup and .save_cleanup functions. Map & unmap migration +region from these functions at source during saving or pre-copy phase. + +Set VFIO device state depending on VM's state. During live migration, VM is +running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO +device. During save-restore, VM is paused, _SAVING state is set for VFIO device. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Cornelia Huck +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 102 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 2 + + 2 files changed, 104 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0c6c9b655f..405228fc5a 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -8,12 +8,15 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/main-loop.h" ++#include "qemu/cutils.h" + #include + + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" + #include "cpu.h" + #include "migration/migration.h" ++#include "migration/vmstate.h" + #include "migration/qemu-file.h" + #include "migration/register.h" + #include "migration/blocker.h" +@@ -25,6 +28,22 @@ + #include "trace.h" + #include "hw/hw.h" + ++/* ++ * Flags to be used as unique delimiters for VFIO devices in the migration ++ * stream. These flags are composed as: ++ * 0xffffffff => MSB 32-bit all 1s ++ * 0xef10 => Magic ID, represents emulated (virtual) function IO ++ * 0x0000 => 16-bits reserved for flags ++ * ++ * The beginning of state information is marked by _DEV_CONFIG_STATE, ++ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a ++ * certain state information is marked by _END_OF_STATE. ++ */ ++#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) ++#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) ++#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) ++#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) ++ + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, + off_t off, bool iswrite) + { +@@ -129,6 +148,75 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + return 0; + } + ++static void vfio_migration_cleanup(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (migration->region.mmaps) { ++ vfio_region_unmap(&migration->region); ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int vfio_save_setup(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret; ++ ++ trace_vfio_save_setup(vbasedev->name); ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); ++ ++ if (migration->region.mmaps) { ++ /* ++ * Calling vfio_region_mmap() from migration thread. Memory API called ++ * from this function require locking the iothread when called from ++ * outside the main loop thread. ++ */ ++ qemu_mutex_lock_iothread(); ++ ret = vfio_region_mmap(&migration->region); ++ qemu_mutex_unlock_iothread(); ++ if (ret) { ++ error_report("%s: Failed to mmap VFIO migration region: %s", ++ vbasedev->name, strerror(-ret)); ++ error_report("%s: Falling back to slow path", vbasedev->name); ++ } ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, ++ VFIO_DEVICE_STATE_SAVING); ++ if (ret) { ++ error_report("%s: Failed to set state SAVING", vbasedev->name); ++ return ret; ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void vfio_save_cleanup(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ vfio_migration_cleanup(vbasedev); ++ trace_vfio_save_cleanup(vbasedev->name); ++} ++ ++static SaveVMHandlers savevm_vfio_handlers = { ++ .save_setup = vfio_save_setup, ++ .save_cleanup = vfio_save_cleanup, ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ + static void vfio_vmstate_change(void *opaque, int running, RunState state) + { + VFIODevice *vbasedev = opaque; +@@ -215,6 +303,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, + int ret; + Object *obj; + VFIOMigration *migration; ++ char id[256] = ""; ++ g_autofree char *path = NULL, *oid = NULL; + + if (!vbasedev->ops->vfio_get_object) { + return -EINVAL; +@@ -244,6 +334,18 @@ static int vfio_migration_init(VFIODevice *vbasedev, + + migration = vbasedev->migration; + migration->vbasedev = vbasedev; ++ ++ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); ++ if (oid) { ++ path = g_strdup_printf("%s/vfio", oid); ++ } else { ++ path = g_strdup("vfio"); ++ } ++ strpadcpy(id, sizeof(id), path, '\0'); ++ ++ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, ++ vbasedev); ++ + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); + migration->migration_state.notify = vfio_migration_state_notifier; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index bd3d47b005..86c18def01 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" + vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" + vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" ++vfio_save_setup(const char *name) " (%s)" ++vfio_save_cleanup(const char *name) " (%s)" +-- +2.27.0 + diff --git a/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch b/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2138b57e71e494198068253be7d7229b510e598 --- /dev/null +++ b/vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch @@ -0,0 +1,41 @@ +From b9d74bcf6aefe8ab607439ad1c518a453053ccee Mon Sep 17 00:00:00 2001 +From: Shenming Lu +Date: Wed, 10 Mar 2021 11:02:32 +0800 +Subject: [PATCH] vfio: Set the priority of the VFIO VM state change handler + explicitly + +In the VFIO VM state change handler when stopping the VM, the _RUNNING +bit in device_state is cleared which makes the VFIO device stop, including +no longer generating interrupts. Then we can save the pending states of +all interrupts in the GIC VM state change handler (on ARM). + +So we have to set the priority of the VFIO VM state change handler +explicitly (like virtio devices) to ensure it is called before the +GIC's in saving. + +Signed-off-by: Shenming Lu +Reviewed-by: Kirti Wankhede +Reviewed-by: Cornelia Huck +Message-Id: <20210310030233.1133-3-lushenming@huawei.com> +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index ea36ae5225..1a97784486 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -862,7 +862,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, + vbasedev); + +- migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, ++ migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev, ++ vfio_vmstate_change, + vbasedev); + migration->migration_state.notify = vfio_migration_state_notifier; + add_migration_state_change_notifier(&migration->migration_state); +-- +2.27.0 + diff --git a/vfio-Set-up-nested-stage-mappings.patch b/vfio-Set-up-nested-stage-mappings.patch new file mode 100644 index 0000000000000000000000000000000000000000..66659e81a8dacc392f4f6a56ab988c36a52af8a2 --- /dev/null +++ b/vfio-Set-up-nested-stage-mappings.patch @@ -0,0 +1,277 @@ +From a65c40f9d1025a9843dec38070d9f26792b00892 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 29 Aug 2018 18:10:12 +0200 +Subject: [PATCH] vfio: Set up nested stage mappings + +In nested mode, legacy vfio_iommu_map_notify cannot be used as +there is no "caching" mode and we do not trap on map. + +On Intel, vfio_iommu_map_notify was used to DMA map the RAM +through the host single stage. + +With nested mode, we need to setup the stage 2 and the stage 1 +separately. This patch introduces a prereg_listener to setup +the stage 2 mapping. + +The stage 1 mapping, owned by the guest, is passed to the host +when the guest invalidates the stage 1 configuration, through +a dedicated PCIPASIDOps callback. Guest IOTLB invalidations +are cascaded downto the host through another IOMMU MR UNMAP +notifier. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 136 +++++++++++++++++++++++++++++++++++++++++-- + hw/vfio/pci.c | 21 +++++++ + hw/vfio/trace-events | 2 + + 3 files changed, 154 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8837d33c57..cc50efdbc1 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -642,6 +642,73 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + return true; + } + ++/* Propagate a guest IOTLB invalidation to the host (nested mode) */ ++static void vfio_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ++{ ++ VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); ++ struct vfio_iommu_type1_cache_invalidate ustruct = {}; ++ VFIOContainer *container = giommu->container; ++ int ret; ++ ++ assert(iotlb->perm == IOMMU_NONE); ++ ++ ustruct.argsz = sizeof(ustruct); ++ ustruct.flags = 0; ++ ustruct.info.argsz = sizeof(struct iommu_cache_invalidate_info); ++ ustruct.info.version = IOMMU_CACHE_INVALIDATE_INFO_VERSION_1; ++ ustruct.info.cache = IOMMU_CACHE_INV_TYPE_IOTLB; ++ ++ switch (iotlb->granularity) { ++ case IOMMU_INV_GRAN_DOMAIN: ++ ustruct.info.granularity = IOMMU_INV_GRANU_DOMAIN; ++ break; ++ case IOMMU_INV_GRAN_PASID: ++ { ++ struct iommu_inv_pasid_info *pasid_info; ++ int archid = -1; ++ ++ pasid_info = &ustruct.info.granu.pasid_info; ++ ustruct.info.granularity = IOMMU_INV_GRANU_PASID; ++ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { ++ pasid_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; ++ archid = iotlb->arch_id; ++ } ++ pasid_info->archid = archid; ++ trace_vfio_iommu_asid_inv_iotlb(archid); ++ break; ++ } ++ case IOMMU_INV_GRAN_ADDR: ++ { ++ hwaddr start = iotlb->iova + giommu->iommu_offset; ++ struct iommu_inv_addr_info *addr_info; ++ size_t size = iotlb->addr_mask + 1; ++ int archid = -1; ++ ++ addr_info = &ustruct.info.granu.addr_info; ++ ustruct.info.granularity = IOMMU_INV_GRANU_ADDR; ++ if (iotlb->leaf) { ++ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_LEAF; ++ } ++ if (iotlb->flags & IOMMU_INV_FLAGS_ARCHID) { ++ addr_info->flags |= IOMMU_INV_ADDR_FLAGS_ARCHID; ++ archid = iotlb->arch_id; ++ } ++ addr_info->archid = archid; ++ addr_info->addr = start; ++ addr_info->granule_size = size; ++ addr_info->nb_granules = 1; ++ trace_vfio_iommu_addr_inv_iotlb(archid, start, size, ++ 1, iotlb->leaf); ++ break; ++ } ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_CACHE_INVALIDATE, &ustruct); ++ if (ret) { ++ error_report("%p: failed to invalidate CACHE (%d)", container, ret); ++ } ++} ++ + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +@@ -823,6 +890,32 @@ static void vfio_dma_unmap_ram_section(VFIOContainer *container, + } + } + ++static void vfio_prereg_listener_region_add(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr)) { ++ return; ++ } ++ ++ vfio_dma_map_ram_section(container, section); ++} ++ ++static void vfio_prereg_listener_region_del(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = ++ container_of(listener, VFIOContainer, prereg_listener); ++ ++ if (!memory_region_is_ram(section->mr)) { ++ return; ++ } ++ ++ vfio_dma_unmap_ram_section(container, section); ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -920,9 +1013,10 @@ static void vfio_listener_region_add(MemoryListener *listener, + memory_region_ref(section->mr); + + if (memory_region_is_iommu(section->mr)) { ++ IOMMUNotify notify; + VFIOGuestIOMMU *giommu; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); +- int iommu_idx; ++ int iommu_idx, flags; + + trace_vfio_listener_region_add_iommu(iova, end); + /* +@@ -941,15 +1035,27 @@ static void vfio_listener_region_add(MemoryListener *listener, + llend = int128_sub(llend, int128_one()); + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, + MEMTXATTRS_UNSPECIFIED); +- iommu_notifier_init(&giommu->n, vfio_iommu_map_notify, +- IOMMU_NOTIFIER_ALL, ++ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ /* IOTLB unmap notifier to propagate guest IOTLB invalidations */ ++ flags = IOMMU_NOTIFIER_UNMAP; ++ notify = vfio_iommu_unmap_notify; ++ } else { ++ /* MAP/UNMAP IOTLB notifier */ ++ flags = IOMMU_NOTIFIER_ALL; ++ notify = vfio_iommu_map_notify; ++ } ++ ++ iommu_notifier_init(&giommu->n, notify, flags, + section->offset_within_region, + int128_get64(llend), + iommu_idx); + QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); + + memory_region_register_iommu_notifier(section->mr, &giommu->n); +- memory_region_iommu_replay(giommu->iommu, &giommu->n); ++ if (flags & IOMMU_NOTIFIER_MAP) { ++ memory_region_iommu_replay(giommu->iommu, &giommu->n); ++ } + + return; + } +@@ -1367,10 +1473,16 @@ static const MemoryListener vfio_memory_listener = { + .log_clear = vfio_listener_log_clear, + }; + ++static MemoryListener vfio_memory_prereg_listener = { ++ .region_add = vfio_prereg_listener_region_add, ++ .region_del = vfio_prereg_listener_region_del, ++}; ++ + static void vfio_listener_release(VFIOContainer *container) + { + memory_listener_unregister(&container->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { ++ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || ++ container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { + memory_listener_unregister(&container->prereg_listener); + } + } +@@ -1976,6 +2088,20 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + vfio_get_iommu_info_migration(container, info); + } + g_free(info); ++ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ container->prereg_listener = vfio_memory_prereg_listener; ++ memory_listener_register(&container->prereg_listener, ++ &address_space_memory); ++ if (container->error) { ++ memory_listener_unregister(&container->prereg_listener); ++ ret = container->error; ++ error_setg(errp, ++ "RAM memory listener initialization failed " ++ "for container"); ++ goto free_container_exit; ++ } ++ } + break; + } + case VFIO_SPAPR_TCE_v2_IOMMU: +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3641ad0c5c..6c90ec9278 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2766,6 +2766,25 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) + vdev->req_enabled = false; + } + ++static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn, ++ IOMMUConfig *config) ++{ ++ PCIDevice *pdev = bus->devices[devfn]; ++ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); ++ VFIOContainer *container = vdev->vbasedev.group->container; ++ struct vfio_iommu_type1_set_pasid_table info; ++ ++ info.argsz = sizeof(info); ++ info.flags = VFIO_PASID_TABLE_FLAG_SET; ++ memcpy(&info.config, &config->pasid_cfg, sizeof(config->pasid_cfg)); ++ ++ return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info); ++} ++ ++static PCIPASIDOps vfio_pci_pasid_ops = { ++ .set_pasid_table = vfio_iommu_set_pasid_table, ++}; ++ + static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = PCI_VFIO(pdev); +@@ -3072,6 +3091,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + vfio_register_req_notifier(vdev); + vfio_setup_resetfn_quirk(vdev); + ++ pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops); ++ + return; + + out_teardown: +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9b6c7ca61b..ee9a67d3ef 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -118,6 +118,8 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" + vfio_dma_unmap_overflow_workaround(void) "" ++vfio_iommu_addr_inv_iotlb(int asid, uint64_t addr, uint64_t size, uint64_t nb_granules, bool leaf) "nested IOTLB invalidate asid=%d, addr=0x%"PRIx64" granule_size=0x%"PRIx64" nb_granules=0x%"PRIx64" leaf=%d" ++vfio_iommu_asid_inv_iotlb(int asid) "nested IOTLB invalidate asid=%d" + + # platform.c + vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group #%d" +-- +2.27.0 + diff --git a/vfio-Support-host-translation-granule-size.patch b/vfio-Support-host-translation-granule-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5eab65155770160c38615d038ea66264e284acb --- /dev/null +++ b/vfio-Support-host-translation-granule-size.patch @@ -0,0 +1,152 @@ +From 594cba5943b3e8bf1bd5720b1fa20d4662920ae0 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Thu, 4 Mar 2021 21:34:46 +0800 +Subject: [PATCH] vfio: Support host translation granule size + +The cpu_physical_memory_set_dirty_lebitmap() can quickly deal with +the dirty pages of memory by bitmap-traveling, regardless of whether +the bitmap is aligned correctly or not. + +cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of +host page size. So it'd better to set bitmap_pgsize to host page size +to support more translation granule sizes. + +[aw: The Fixes commit below introduced code to restrict migration +support to configurations where the target page size intersects the +host dirty page support. For example, a 4K guest on a 4K host. +Due to the above flexibility in bitmap handling, this restriction +unnecessarily prevents mixed target/host pages size that could +otherwise be supported. Use host page size for dirty bitmap.] + +Fixes: fc49c9cbf2 ("vfio: Get migration capability flags for container") +Signed-off-by: Kunkun Jiang +Message-Id: <20210304133446.1521-1-jiangkunkun@huawei.com> +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 48 +++++++++++++++++++++++++----------------------- + 1 file changed, 25 insertions(+), 23 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index ebd701faa0..a7817c90cc 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -377,7 +377,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + { + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; +- uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; ++ uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size; + int ret; + + unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); +@@ -389,12 +389,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + bitmap = (struct vfio_bitmap *)&unmap->data; + + /* +- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of +- * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to +- * TARGET_PAGE_SIZE. ++ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of ++ * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize ++ * to qemu_real_host_page_size. + */ + +- bitmap->pgsize = TARGET_PAGE_SIZE; ++ bitmap->pgsize = qemu_real_host_page_size; + bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; + +@@ -672,16 +672,17 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != +- (section->offset_within_region & ~TARGET_PAGE_MASK))) { ++ if (unlikely((section->offset_within_address_space & ++ ~qemu_real_host_page_mask) != ++ (section->offset_within_region & ~qemu_real_host_page_mask))) { + error_report("%s received unaligned region", __func__); + return; + } + +- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); +- llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); ++ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); + + if (int128_ge(int128_make64(iova), llend)) { + return; +@@ -866,8 +867,9 @@ static void vfio_listener_region_del(MemoryListener *listener, + return; + } + +- if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != +- (section->offset_within_region & ~TARGET_PAGE_MASK))) { ++ if (unlikely((section->offset_within_address_space & ++ ~qemu_real_host_page_mask) != ++ (section->offset_within_region & ~qemu_real_host_page_mask))) { + error_report("%s received unaligned region", __func__); + return; + } +@@ -895,10 +897,10 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + +- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); +- llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); ++ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); + + if (int128_ge(int128_make64(iova), llend)) { + return; +@@ -967,13 +969,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + range->size = size; + + /* +- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of +- * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to +- * TARGET_PAGE_SIZE. ++ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of ++ * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize ++ * to qemu_real_host_page_size. + */ +- range->bitmap.pgsize = TARGET_PAGE_SIZE; ++ range->bitmap.pgsize = qemu_real_host_page_size; + +- pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; ++ pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size; + range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; + range->bitmap.data = g_try_malloc0(range->bitmap.size); +@@ -1077,8 +1079,8 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + section->offset_within_region; + + return vfio_get_dirty_bitmap(container, +- TARGET_PAGE_ALIGN(section->offset_within_address_space), +- int128_get64(section->size), ram_addr); ++ REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), ++ int128_get64(section->size), ram_addr); + } + + static void vfio_listener_log_sync(MemoryListener *listener, +@@ -1572,10 +1574,10 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + header); + + /* +- * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of +- * TARGET_PAGE_SIZE to mark those dirty. ++ * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of ++ * qemu_real_host_page_size to mark those dirty. + */ +- if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { ++ if (cap_mig->pgsize_bitmap & qemu_real_host_page_size) { + container->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; +-- +2.27.0 + diff --git a/vfio-add-quirk-device-write-method.patch b/vfio-add-quirk-device-write-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7e2c99dc212605291627dac4dee0512e1b34f86 --- /dev/null +++ b/vfio-add-quirk-device-write-method.patch @@ -0,0 +1,40 @@ +From 95ee5273e25ed606aa86f8a154c06887efc20494 Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Thu, 25 Mar 2021 17:12:57 +0800 +Subject: [PATCH] vfio: add quirk device write method + +--- + hw/vfio/pci-quirks.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index b35a640030..9ce790bdd2 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -12,6 +12,7 @@ + + #include "qemu/osdep.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "qemu/error-report.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" +@@ -275,8 +276,15 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque, + return data; + } + ++static void vfio_ati_3c3_quirk_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ qemu_log_mask(LOG_GUEST_ERROR, "%s not implemented\n", __func__); ++} ++ + static const MemoryRegionOps vfio_ati_3c3_quirk = { + .read = vfio_ati_3c3_quirk_read, ++ .write = vfio_ati_3c3_quirk_write, + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-- +2.27.0 + diff --git a/vfio-common-Add-address-alignment-check-in-vfio_list.patch b/vfio-common-Add-address-alignment-check-in-vfio_list.patch new file mode 100644 index 0000000000000000000000000000000000000000..0aacdcd02d3deecc95da91603e06ec7decc924d1 --- /dev/null +++ b/vfio-common-Add-address-alignment-check-in-vfio_list.patch @@ -0,0 +1,53 @@ +From 0a6ee00461c784ef547b8f071ad147fcb89875b6 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 14 Sep 2021 14:21:46 +0800 +Subject: [PATCH] vfio/common: Add address alignment check in + vfio_listener_region_del + +Both vfio_listener_region_add and vfio_listener_region_del have +reference counting operations on ram section->mr. If the 'iova' +and 'llend' of the ram section do not pass the alignment +check, the ram section should not be mapped or unmapped. It means +that the reference counting should not be changed. + +However, the address alignment check is missing in +vfio_listener_region_del. This makes memory_region_unref will +be unconditional called and causes unintended problems in some +scenarios. + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index de166dd5f9..6d6a4c6dee 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1149,6 +1149,8 @@ static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ hwaddr iova; ++ Int128 llend; + + if (vfio_listener_skipped_section(section)) { + trace_vfio_listener_region_del_skip( +@@ -1198,6 +1200,14 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); ++ llend = int128_make64(section->offset_within_address_space); ++ llend = int128_add(llend, section->size); ++ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); ++ if (int128_ge(int128_make64(iova), llend)) { ++ return; ++ } ++ + vfio_dma_unmap_ram_section(container, section); + + memory_region_unref(section->mr); +-- +2.27.0 + diff --git a/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch b/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch new file mode 100644 index 0000000000000000000000000000000000000000..efcbd1fd03162efd34a1c11bc169e39da757da6b --- /dev/null +++ b/vfio-common-Avoid-unmap-ram-section-at-vfio_listener.patch @@ -0,0 +1,39 @@ +From 55f3bdd0866be2b1a6223bacf9e00a032daf957c Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 31 Jul 2021 10:02:18 +0800 +Subject: [PATCH] vfio/common: Avoid unmap ram section at + vfio_listener_region_del() in nested mode + +The ram section will be unmapped at vfio_prereg_listener_region_del() +in nested mode. So let's avoid unmap ram section at +vfio_listener_region_dev(). + +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 98dc9e6f84..21a866e545 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1179,6 +1179,16 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + } + ++ /* ++ * In nested mode, stage 2 (gpa->hpa) and the stage 1 ++ * (giova->gpa) are set separately. The ram section ++ * will be unmapped in vfio_prereg_listener_region_del(). ++ * Hence it doesn't need to unmap ram section here. ++ */ ++ if (container->iommu_type == VFIO_TYPE1_NESTING_IOMMU) { ++ return; ++ } ++ + /* + * FIXME: We assume the one big unmap below is adequate to + * remove any individual page mappings in the IOMMU which +-- +2.27.0 + diff --git a/vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch b/vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1b96e6d9547c6d4852fc079e702f5aff82822d3 --- /dev/null +++ b/vfio-common-Fix-incorrect-address-alignment-in-vfio_.patch @@ -0,0 +1,40 @@ +From 7438519f5cfb0e07dd54f242901761da87f1156c Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Tue, 7 Sep 2021 15:14:12 +0800 +Subject: [PATCH] vfio/common: Fix incorrect address alignment in + vfio_dma_map_ram_section + +The 'iova' will be passed to host kernel for mapping with the +HPA. It is related to the host page size. So TARGET_PAGE_ALIGN +should be replaced by REAL_HOST_PAGE_ALIGN. In the case of +large granularity (64K), it may return early when map MMIO RAM +section. And because of the inconsistency with +vfio_dma_unmap_ram_section, it may cause 'assert(qrange)' +in vfio_dma_unmap. + +Signed-off-by: Kunkun Jiang +Signed-off-by: Zenghui Yu +--- + hw/vfio/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index fb7ca63748..de166dd5f9 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -846,10 +846,10 @@ static int vfio_dma_map_ram_section(VFIOContainer *container, + + assert(memory_region_is_ram(section->mr)); + +- iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); ++ iova = REAL_HOST_PAGE_ALIGN(section->offset_within_address_space); + llend = int128_make64(section->offset_within_address_space); + llend = int128_add(llend, section->size); +- llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); ++ llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask)); + end = int128_get64(int128_sub(llend, int128_one())); + + vaddr = memory_region_get_ram_ptr(section->mr) + +-- +2.27.0 + diff --git a/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch b/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch new file mode 100644 index 0000000000000000000000000000000000000000..5f543b40bdb7e93d671edbd834b4279dec69c8c9 --- /dev/null +++ b/vfio-migrate-Move-switch-of-dirty-tracking-into-vfio.patch @@ -0,0 +1,196 @@ +From 74b651428e6ed65177354d80bd888e842a4a5077 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 9 Mar 2021 11:19:13 +0800 +Subject: [PATCH] vfio/migrate: Move switch of dirty tracking into + vfio_memory_listener + +For now the switch of vfio dirty page tracking is integrated into +@vfio_save_handler. The reason is that some PCI vendor driver may +start to track dirty base on _SAVING state of device, so if dirty +tracking is started before setting device state, vfio will report +full-dirty to QEMU. + +However, the dirty bmap of all ramblocks are fully set when setup +ram saving, so it's not matter whether the device is in _SAVING +state when start vfio dirty tracking. + +Moreover, this logic causes some problems [1]. The object of dirty +tracking is guest memory, but the object of @vfio_save_handler is +device state, which produces unnecessary coupling and conflicts: + +1. Coupling: Their saving granule is different (perVM vs perDevice). + vfio will enable dirty_page_tracking for each devices, actually + once is enough. + +2. Conflicts: The ram_save_setup() traverses all memory_listeners + to execute their log_start() and log_sync() hooks to get the + first round dirty bitmap, which is used by the bulk stage of + ram saving. However, as vfio dirty tracking is not yet started, + it can't get dirty bitmap from vfio. Then we give up the chance + to handle vfio dirty page at bulk stage. + +Move the switch of vfio dirty_page_tracking into vfio_memory_listener +can solve above problems. Besides, Do not require devices in SAVING +state for vfio_sync_dirty_bitmap(). + +[1] https://www.spinics.net/lists/kvm/msg229967.html + +Reported-by: Zenghui Yu +Signed-off-by: Keqian Zhu +Suggested-by: Paolo Bonzini +Message-Id: <20210309031913.11508-1-zhukeqian1@huawei.com> +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 49 ++++++++++++++++++++++++++++++++++++--------- + hw/vfio/migration.c | 35 -------------------------------- + 2 files changed, 40 insertions(+), 44 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a7817c90cc..245e32df5b 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -310,7 +310,7 @@ bool vfio_mig_active(void) + return true; + } + +-static bool vfio_devices_all_saving(VFIOContainer *container) ++static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + { + VFIOGroup *group; + VFIODevice *vbasedev; +@@ -328,13 +328,8 @@ static bool vfio_devices_all_saving(VFIOContainer *container) + return false; + } + +- if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { +- if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) +- && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { +- return false; +- } +- continue; +- } else { ++ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) ++ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { + return false; + } + } +@@ -952,6 +947,40 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + } + ++static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) ++{ ++ int ret; ++ struct vfio_iommu_type1_dirty_bitmap dirty = { ++ .argsz = sizeof(dirty), ++ }; ++ ++ if (start) { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; ++ } else { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); ++ if (ret) { ++ error_report("Failed to set dirty tracking flag 0x%x errno: %d", ++ dirty.flags, errno); ++ } ++} ++ ++static void vfio_listener_log_global_start(MemoryListener *listener) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ vfio_set_dirty_page_tracking(container, true); ++} ++ ++static void vfio_listener_log_global_stop(MemoryListener *listener) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ vfio_set_dirty_page_tracking(container, false); ++} ++ + static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { +@@ -1093,7 +1122,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_saving(container)) { ++ if (vfio_devices_all_dirty_tracking(container)) { + vfio_sync_dirty_bitmap(container, section); + } + } +@@ -1101,6 +1130,8 @@ static void vfio_listener_log_sync(MemoryListener *listener, + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, ++ .log_global_start = vfio_listener_log_global_start, ++ .log_global_stop = vfio_listener_log_global_stop, + .log_sync = vfio_listener_log_sync, + }; + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 033cb2b0c9..f1f006d584 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -395,40 +395,10 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) + return qemu_file_get_error(f); + } + +-static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) +-{ +- int ret; +- VFIOMigration *migration = vbasedev->migration; +- VFIOContainer *container = vbasedev->group->container; +- struct vfio_iommu_type1_dirty_bitmap dirty = { +- .argsz = sizeof(dirty), +- }; +- +- if (start) { +- if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { +- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; +- } else { +- return -EINVAL; +- } +- } else { +- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; +- } +- +- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); +- if (ret) { +- error_report("Failed to set dirty tracking flag 0x%x errno: %d", +- dirty.flags, errno); +- return -errno; +- } +- return ret; +-} +- + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; + +- vfio_set_dirty_page_tracking(vbasedev, false); +- + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } +@@ -469,11 +439,6 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return ret; + } + +- ret = vfio_set_dirty_page_tracking(vbasedev, true); +- if (ret) { +- return ret; +- } +- + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); +-- +2.27.0 + diff --git a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch new file mode 100644 index 0000000000000000000000000000000000000000..c59bc4e1ff70f6993557329480505c4300ff6aa0 --- /dev/null +++ b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch @@ -0,0 +1,223 @@ +From f9574b63bf5e940d794db2c3aaf928bde36d9521 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:05 +0800 +Subject: [PATCH] vfio/migration: Add support for manual clear vfio dirty log + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, tweak the userspace side to use them. + +Check if the kernel supports VFIO_DIRTY_LOG_MANUAL_CLEAR and +provide the log_clear() hook for vfio_memory_listener. If the +kernel supports it, deliever the clear message to kernel. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 149 +++++++++++++++++++++++++++++++++- + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 148 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index c33c4c539d..206fb83e28 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1045,7 +1045,9 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); +- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ dbitmap->flags = container->dirty_log_manual_clear ? ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR : ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; +@@ -1176,12 +1178,148 @@ static void vfio_listener_log_sync(MemoryListener *listener, + } + } + ++/* ++ * I'm not sure if there's any alignment requirement for the CLEAR_BITMAP ++ * ioctl. But copy from kvm side and align {start, size} with 64 pages. ++ * ++ * I think the code can be simplified a lot if no alignment requirement. ++ */ ++#define VFIO_CLEAR_LOG_SHIFT 6 ++#define VFIO_CLEAR_LOG_ALIGN (qemu_real_host_page_size << VFIO_CLEAR_LOG_SHIFT) ++#define VFIO_CLEAR_LOG_MASK (-VFIO_CLEAR_LOG_ALIGN) ++ ++static int vfio_log_clear_one_range(VFIOContainer *container, ++ VFIODMARange *qrange, uint64_t start, uint64_t size) ++{ ++ struct vfio_iommu_type1_dirty_bitmap *dbitmap; ++ struct vfio_iommu_type1_dirty_bitmap_get *range; ++ ++ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); ++ ++ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); ++ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP; ++ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; ++ ++ /* ++ * Now let's deal with the actual bitmap, which is almost the same ++ * as the kvm side. ++ */ ++ uint64_t end, bmap_start, start_delta, bmap_npages; ++ unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size; ++ int ret; ++ ++ bmap_start = start & VFIO_CLEAR_LOG_MASK; ++ start_delta = start - bmap_start; ++ bmap_start /= psize; ++ ++ bmap_npages = DIV_ROUND_UP(size + start_delta, VFIO_CLEAR_LOG_ALIGN) ++ << VFIO_CLEAR_LOG_SHIFT; ++ end = qrange->size / psize; ++ if (bmap_npages > end - bmap_start) { ++ bmap_npages = end - bmap_start; ++ } ++ start_delta /= psize; ++ ++ if (start_delta) { ++ bmap_clear = bitmap_new(bmap_npages); ++ bitmap_copy_with_src_offset(bmap_clear, qrange->bitmap, ++ bmap_start, start_delta + size / psize); ++ bitmap_clear(bmap_clear, 0, start_delta); ++ range->bitmap.data = (__u64 *)bmap_clear; ++ } else { ++ range->bitmap.data = (__u64 *)(qrange->bitmap + BIT_WORD(bmap_start)); ++ } ++ ++ range->iova = qrange->iova + bmap_start * psize; ++ range->size = bmap_npages * psize; ++ range->bitmap.size = ROUND_UP(bmap_npages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ range->bitmap.pgsize = qemu_real_host_page_size; ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); ++ if (ret) { ++ error_report("Failed to clear dirty log for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, ++ (uint64_t)range->size, errno); ++ goto err_out; ++ } ++ ++ bitmap_clear(qrange->bitmap, bmap_start + start_delta, size / psize); ++err_out: ++ g_free(bmap_clear); ++ g_free(dbitmap); ++ return 0; ++} ++ ++static int vfio_physical_log_clear(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ uint64_t start, size, offset, count; ++ VFIODMARange *qrange; ++ int ret = 0; ++ ++ if (!container->dirty_log_manual_clear) { ++ /* No need to do explicit clear */ ++ return ret; ++ } ++ ++ start = section->offset_within_address_space; ++ size = int128_get64(section->size); ++ ++ if (!size) { ++ return ret; ++ } ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ /* ++ * Discard ranges that do not overlap the section (e.g., the ++ * Memory BAR regions of the device) ++ */ ++ if (qrange->iova > start + size - 1 || ++ start > qrange->iova + qrange->size - 1) { ++ continue; ++ } ++ ++ if (start >= qrange->iova) { ++ /* The range starts before section or is aligned to it. */ ++ offset = start - qrange->iova; ++ count = MIN(qrange->size - offset, size); ++ } else { ++ /* The range starts after section. */ ++ offset = 0; ++ count = MIN(qrange->size, size - (qrange->iova - start)); ++ } ++ ret = vfio_log_clear_one_range(container, qrange, offset, count); ++ if (ret < 0) { ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static void vfio_listener_log_clear(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ if (vfio_listener_skipped_section(section) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_dirty_tracking(container)) { ++ vfio_physical_log_clear(container, section); ++ } ++} ++ + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, + .log_global_start = vfio_listener_log_global_start, + .log_global_stop = vfio_listener_log_global_stop, + .log_sync = vfio_listener_log_sync, ++ .log_clear = vfio_listener_log_clear, + }; + + static void vfio_listener_release(VFIOContainer *container) +@@ -1563,7 +1701,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + static int vfio_init_container(VFIOContainer *container, int group_fd, + Error **errp) + { +- int iommu_type, ret; ++ int iommu_type, dirty_log_manual_clear, ret; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -1592,6 +1730,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; ++ ++ dirty_log_manual_clear = ioctl(container->fd, VFIO_CHECK_EXTENSION, ++ VFIO_DIRTY_LOG_MANUAL_CLEAR); ++ if (dirty_log_manual_clear) { ++ container->dirty_log_manual_clear = dirty_log_manual_clear; ++ } ++ + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 2853dc861e..1277914ca8 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -93,6 +93,7 @@ typedef struct VFIOContainer { + int error; + bool initialized; + bool dirty_pages_supported; ++ bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; +-- +2.27.0 + diff --git a/vfio-pci-Implement-return_page_response-page-respons.patch b/vfio-pci-Implement-return_page_response-page-respons.patch new file mode 100644 index 0000000000000000000000000000000000000000..721512e4095c6385efe44279e7e44744ea781899 --- /dev/null +++ b/vfio-pci-Implement-return_page_response-page-respons.patch @@ -0,0 +1,199 @@ +From dab7c3ad6d51e9f0c65d864d6128f62697db4604 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 Nov 2020 12:03:29 -0500 +Subject: [PATCH] vfio/pci: Implement return_page_response page response + callback + +This patch implements the page response path. The +response is written into the page response ring buffer and then +update header's head index is updated. This path is not used +by this series. It is introduced here as a POC for vSVA/ARM +integration. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/pci.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/pci.h | 2 + + 2 files changed, 125 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d1198c8a23..6f4083aec8 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2662,6 +2662,61 @@ out: + g_free(fault_region_info); + } + ++static void vfio_init_fault_response_regions(VFIOPCIDevice *vdev, Error **errp) ++{ ++ struct vfio_region_info *fault_region_info = NULL; ++ struct vfio_region_info_cap_fault *cap_fault; ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ struct vfio_info_cap_header *hdr; ++ char *fault_region_name; ++ int ret; ++ ++ ret = vfio_get_dev_region_info(&vdev->vbasedev, ++ VFIO_REGION_TYPE_NESTED, ++ VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE, ++ &fault_region_info); ++ if (ret) { ++ goto out; ++ } ++ ++ hdr = vfio_get_region_info_cap(fault_region_info, ++ VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE); ++ if (!hdr) { ++ error_setg(errp, "failed to retrieve DMA FAULT RESPONSE capability"); ++ goto out; ++ } ++ cap_fault = container_of(hdr, struct vfio_region_info_cap_fault, ++ header); ++ if (cap_fault->version != 1) { ++ error_setg(errp, "Unsupported DMA FAULT RESPONSE API version %d", ++ cap_fault->version); ++ goto out; ++ } ++ ++ fault_region_name = g_strdup_printf("%s DMA FAULT RESPONSE %d", ++ vbasedev->name, ++ fault_region_info->index); ++ ++ ret = vfio_region_setup(OBJECT(vdev), vbasedev, ++ &vdev->dma_fault_response_region, ++ fault_region_info->index, ++ fault_region_name); ++ g_free(fault_region_name); ++ if (ret) { ++ error_setg_errno(errp, -ret, ++ "failed to set up the DMA FAULT RESPONSE region %d", ++ fault_region_info->index); ++ goto out; ++ } ++ ++ ret = vfio_region_mmap(&vdev->dma_fault_response_region); ++ if (ret) { ++ error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT RESPONSE queue"); ++ } ++out: ++ g_free(fault_region_info); ++} ++ + static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) + { + VFIODevice *vbasedev = &vdev->vbasedev; +@@ -2737,6 +2792,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) + return; + } + ++ vfio_init_fault_response_regions(vdev, &err); ++ if (err) { ++ error_propagate(errp, err); ++ return; ++ } ++ + irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); +@@ -2915,8 +2976,68 @@ static int vfio_iommu_set_pasid_table(PCIBus *bus, int32_t devfn, + return ioctl(container->fd, VFIO_IOMMU_SET_PASID_TABLE, &info); + } + ++static int vfio_iommu_return_page_response(PCIBus *bus, int32_t devfn, ++ IOMMUPageResponse *resp) ++{ ++ PCIDevice *pdev = bus->devices[devfn]; ++ VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); ++ struct iommu_page_response *response = &resp->resp; ++ struct vfio_region_dma_fault_response header; ++ struct iommu_page_response *queue; ++ char *queue_buffer = NULL; ++ ssize_t bytes; ++ ++ if (!vdev->dma_fault_response_region.mem) { ++ return -EINVAL; ++ } ++ ++ /* read the header */ ++ bytes = pread(vdev->vbasedev.fd, &header, sizeof(header), ++ vdev->dma_fault_response_region.fd_offset); ++ if (bytes != sizeof(header)) { ++ error_report("%s unable to read the fault region header (0x%lx)", ++ __func__, bytes); ++ return -1; ++ } ++ ++ /* Normally the fault queue is mmapped */ ++ queue = (struct iommu_page_response *)vdev->dma_fault_response_region.mmaps[0].mmap; ++ if (!queue) { ++ size_t queue_size = header.nb_entries * header.entry_size; ++ ++ error_report("%s: fault queue not mmapped: slower fault handling", ++ vdev->vbasedev.name); ++ ++ queue_buffer = g_malloc(queue_size); ++ bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size, ++ vdev->dma_fault_response_region.fd_offset + header.offset); ++ if (bytes != queue_size) { ++ error_report("%s unable to read the fault queue (0x%lx)", ++ __func__, bytes); ++ return -1; ++ } ++ ++ queue = (struct iommu_page_response *)queue_buffer; ++ } ++ /* deposit the new response in the queue and increment the head */ ++ memcpy(queue + header.head, response, header.entry_size); ++ ++ vdev->fault_response_head_index = ++ (vdev->fault_response_head_index + 1) % header.nb_entries; ++ bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_response_head_index, 4, ++ vdev->dma_fault_response_region.fd_offset); ++ if (bytes != 4) { ++ error_report("%s unable to write the fault response region head index (0x%lx)", ++ __func__, bytes); ++ } ++ g_free(queue_buffer); ++ ++ return 0; ++} ++ + static PCIPASIDOps vfio_pci_pasid_ops = { + .set_pasid_table = vfio_iommu_set_pasid_table, ++ .return_page_response = vfio_iommu_return_page_response, + }; + + static void vfio_dma_fault_notifier_handler(void *opaque) +@@ -3373,6 +3494,7 @@ static void vfio_instance_finalize(Object *obj) + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); + vfio_region_finalize(&vdev->dma_fault_region); ++ vfio_region_finalize(&vdev->dma_fault_response_region); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* +@@ -3394,6 +3516,7 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_unregister_err_notifier(vdev); + vfio_unregister_ext_irq_notifiers(vdev); + vfio_region_exit(&vdev->dma_fault_region); ++ vfio_region_exit(&vdev->dma_fault_response_region); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index e31bc0173a..7fdcfa0dc8 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -143,6 +143,8 @@ typedef struct VFIOPCIDevice { + VFIOPCIExtIRQ *ext_irqs; + VFIORegion dma_fault_region; + uint32_t fault_tail_index; ++ VFIORegion dma_fault_response_region; ++ uint32_t fault_response_head_index; + int (*resetfn)(struct VFIOPCIDevice *); + uint32_t vendor_id; + uint32_t device_id; +-- +2.27.0 + diff --git a/vfio-pci-Implement-the-DMA-fault-handler.patch b/vfio-pci-Implement-the-DMA-fault-handler.patch new file mode 100644 index 0000000000000000000000000000000000000000..ca61b01c4469cd30c3b4b781c2cc527b48c45e80 --- /dev/null +++ b/vfio-pci-Implement-the-DMA-fault-handler.patch @@ -0,0 +1,96 @@ +From 139d0b3474c29427fea4a0ed47f51c01a76a8636 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 5 Mar 2019 16:35:32 +0100 +Subject: [PATCH] vfio/pci: Implement the DMA fault handler + +Whenever the eventfd is triggered, we retrieve the DMA fault(s) +from the mmapped fault region and inject them in the iommu +memory region. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/pci.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/pci.h | 1 + + 2 files changed, 51 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 0db7d68258..d1198c8a23 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2922,10 +2922,60 @@ static PCIPASIDOps vfio_pci_pasid_ops = { + static void vfio_dma_fault_notifier_handler(void *opaque) + { + VFIOPCIExtIRQ *ext_irq = opaque; ++ VFIOPCIDevice *vdev = ext_irq->vdev; ++ PCIDevice *pdev = &vdev->pdev; ++ AddressSpace *as = pci_device_iommu_address_space(pdev); ++ IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(as->root); ++ struct vfio_region_dma_fault header; ++ struct iommu_fault *queue; ++ char *queue_buffer = NULL; ++ ssize_t bytes; + + if (!event_notifier_test_and_clear(&ext_irq->notifier)) { + return; + } ++ ++ bytes = pread(vdev->vbasedev.fd, &header, sizeof(header), ++ vdev->dma_fault_region.fd_offset); ++ if (bytes != sizeof(header)) { ++ error_report("%s unable to read the fault region header (0x%lx)", ++ __func__, bytes); ++ return; ++ } ++ ++ /* Normally the fault queue is mmapped */ ++ queue = (struct iommu_fault *)vdev->dma_fault_region.mmaps[0].mmap; ++ if (!queue) { ++ size_t queue_size = header.nb_entries * header.entry_size; ++ ++ error_report("%s: fault queue not mmapped: slower fault handling", ++ vdev->vbasedev.name); ++ ++ queue_buffer = g_malloc(queue_size); ++ bytes = pread(vdev->vbasedev.fd, queue_buffer, queue_size, ++ vdev->dma_fault_region.fd_offset + header.offset); ++ if (bytes != queue_size) { ++ error_report("%s unable to read the fault queue (0x%lx)", ++ __func__, bytes); ++ return; ++ } ++ ++ queue = (struct iommu_fault *)queue_buffer; ++ } ++ ++ while (vdev->fault_tail_index != header.head) { ++ memory_region_inject_faults(iommu_mr, 1, ++ &queue[vdev->fault_tail_index]); ++ vdev->fault_tail_index = ++ (vdev->fault_tail_index + 1) % header.nb_entries; ++ } ++ bytes = pwrite(vdev->vbasedev.fd, &vdev->fault_tail_index, 4, ++ vdev->dma_fault_region.fd_offset); ++ if (bytes != 4) { ++ error_report("%s unable to write the fault region tail index (0x%lx)", ++ __func__, bytes); ++ } ++ g_free(queue_buffer); + } + + static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 815154656c..e31bc0173a 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -142,6 +142,7 @@ typedef struct VFIOPCIDevice { + EventNotifier req_notifier; + VFIOPCIExtIRQ *ext_irqs; + VFIORegion dma_fault_region; ++ uint32_t fault_tail_index; + int (*resetfn)(struct VFIOPCIDevice *); + uint32_t vendor_id; + uint32_t device_id; +-- +2.27.0 + diff --git a/vfio-pci-Register-handler-for-iommu-fault.patch b/vfio-pci-Register-handler-for-iommu-fault.patch new file mode 100644 index 0000000000000000000000000000000000000000..feea0a347baad96a592cefba3dd6957947d1505d --- /dev/null +++ b/vfio-pci-Register-handler-for-iommu-fault.patch @@ -0,0 +1,168 @@ +From 65b96da46d2c5dfdcf3a4618cf75ca94345164d7 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 13 Dec 2018 04:39:30 -0500 +Subject: [PATCH] vfio/pci: Register handler for iommu fault + +We use the new extended IRQ VFIO_IRQ_TYPE_NESTED type and +VFIO_IRQ_SUBTYPE_DMA_FAULT subtype to set/unset +a notifier for physical DMA faults. The associated eventfd is +triggered, in nested mode, whenever a fault is detected at IOMMU +physical level. + +The actual handler will be implemented in subsequent patches. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/pci.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++- + hw/vfio/pci.h | 7 +++++ + 2 files changed, 87 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index bbcba3fd16..f5c05d508d 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2857,6 +2857,76 @@ static PCIPASIDOps vfio_pci_pasid_ops = { + .set_pasid_table = vfio_iommu_set_pasid_table, + }; + ++static void vfio_dma_fault_notifier_handler(void *opaque) ++{ ++ VFIOPCIExtIRQ *ext_irq = opaque; ++ ++ if (!event_notifier_test_and_clear(&ext_irq->notifier)) { ++ return; ++ } ++} ++ ++static int vfio_register_ext_irq_handler(VFIOPCIDevice *vdev, ++ uint32_t type, uint32_t subtype, ++ IOHandler *handler) ++{ ++ int32_t fd, ext_irq_index, index; ++ struct vfio_irq_info *irq_info; ++ Error *err = NULL; ++ EventNotifier *n; ++ int ret; ++ ++ ret = vfio_get_dev_irq_info(&vdev->vbasedev, type, subtype, &irq_info); ++ if (ret) { ++ return ret; ++ } ++ index = irq_info->index; ++ ext_irq_index = irq_info->index - VFIO_PCI_NUM_IRQS; ++ g_free(irq_info); ++ ++ vdev->ext_irqs[ext_irq_index].vdev = vdev; ++ vdev->ext_irqs[ext_irq_index].index = index; ++ n = &vdev->ext_irqs[ext_irq_index].notifier; ++ ++ ret = event_notifier_init(n, 0); ++ if (ret) { ++ error_report("vfio: Unable to init event notifier for ext irq %d(%d)", ++ ext_irq_index, ret); ++ return ret; ++ } ++ ++ fd = event_notifier_get_fd(n); ++ qemu_set_fd_handler(fd, vfio_dma_fault_notifier_handler, NULL, ++ &vdev->ext_irqs[ext_irq_index]); ++ ++ ret = vfio_set_irq_signaling(&vdev->vbasedev, index, 0, ++ VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err); ++ if (ret) { ++ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); ++ qemu_set_fd_handler(fd, NULL, NULL, vdev); ++ event_notifier_cleanup(n); ++ } ++ return ret; ++} ++ ++static void vfio_unregister_ext_irq_notifiers(VFIOPCIDevice *vdev) ++{ ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ Error *err = NULL; ++ int i; ++ ++ for (i = 0; i < vbasedev->num_irqs - VFIO_PCI_NUM_IRQS; i++) { ++ if (vfio_set_irq_signaling(vbasedev, i + VFIO_PCI_NUM_IRQS , 0, ++ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { ++ error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); ++ } ++ qemu_set_fd_handler(event_notifier_get_fd(&vdev->ext_irqs[i].notifier), ++ NULL, NULL, vdev); ++ event_notifier_cleanup(&vdev->ext_irqs[i].notifier); ++ } ++ g_free(vdev->ext_irqs); ++} ++ + static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = PCI_VFIO(pdev); +@@ -2867,7 +2937,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int i, ret, nb_ext_irqs; + bool is_mdev; + + if (!vdev->vbasedev.sysfsdev) { +@@ -2955,6 +3025,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + goto error; + } + ++ nb_ext_irqs = vdev->vbasedev.num_irqs - VFIO_PCI_NUM_IRQS; ++ if (nb_ext_irqs > 0) { ++ vdev->ext_irqs = g_new0(VFIOPCIExtIRQ, nb_ext_irqs); ++ } ++ + vfio_populate_device(vdev, &err); + if (err) { + error_propagate(errp, err); +@@ -3161,6 +3236,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); ++ vfio_register_ext_irq_handler(vdev, VFIO_IRQ_TYPE_NESTED, ++ VFIO_IRQ_SUBTYPE_DMA_FAULT, ++ vfio_dma_fault_notifier_handler); + vfio_setup_resetfn_quirk(vdev); + + pci_setup_pasid_ops(pdev, &vfio_pci_pasid_ops); +@@ -3201,6 +3279,7 @@ static void vfio_exitfn(PCIDevice *pdev) + + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); ++ vfio_unregister_ext_irq_notifiers(vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 834a90d646..893d074375 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -113,6 +113,12 @@ typedef struct VFIOMSIXInfo { + unsigned long *pending; + } VFIOMSIXInfo; + ++typedef struct VFIOPCIExtIRQ { ++ struct VFIOPCIDevice *vdev; ++ EventNotifier notifier; ++ uint32_t index; ++} VFIOPCIExtIRQ; ++ + typedef struct VFIOPCIDevice { + PCIDevice pdev; + VFIODevice vbasedev; +@@ -134,6 +140,7 @@ typedef struct VFIOPCIDevice { + PCIHostDeviceAddress host; + EventNotifier err_notifier; + EventNotifier req_notifier; ++ VFIOPCIExtIRQ *ext_irqs; + int (*resetfn)(struct VFIOPCIDevice *); + uint32_t vendor_id; + uint32_t device_id; +-- +2.27.0 + diff --git a/vfio-pci-Set-up-the-DMA-FAULT-region.patch b/vfio-pci-Set-up-the-DMA-FAULT-region.patch new file mode 100644 index 0000000000000000000000000000000000000000..ae70a0696cb8310e2669b7e75d2e12bf8e9911f8 --- /dev/null +++ b/vfio-pci-Set-up-the-DMA-FAULT-region.patch @@ -0,0 +1,132 @@ +From e44d9cc377848f0a560b6d114561852e95fab557 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 13 Dec 2018 10:57:53 -0500 +Subject: [PATCH] vfio/pci: Set up the DMA FAULT region + +Set up the fault region which is composed of the actual fault +queue (mmappable) and a header used to handle it. The fault +queue is mmapped. + +Signed-off-by: Eric Auger +Signed-off-by: Kunkun Jiang +--- + hw/vfio/pci.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/pci.h | 1 + + 2 files changed, 65 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index f5c05d508d..0db7d68258 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2607,11 +2607,67 @@ int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) + return 0; + } + ++static void vfio_init_fault_regions(VFIOPCIDevice *vdev, Error **errp) ++{ ++ struct vfio_region_info *fault_region_info = NULL; ++ struct vfio_region_info_cap_fault *cap_fault; ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ struct vfio_info_cap_header *hdr; ++ char *fault_region_name; ++ int ret; ++ ++ ret = vfio_get_dev_region_info(&vdev->vbasedev, ++ VFIO_REGION_TYPE_NESTED, ++ VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT, ++ &fault_region_info); ++ if (ret) { ++ goto out; ++ } ++ ++ hdr = vfio_get_region_info_cap(fault_region_info, ++ VFIO_REGION_INFO_CAP_DMA_FAULT); ++ if (!hdr) { ++ error_setg(errp, "failed to retrieve DMA FAULT capability"); ++ goto out; ++ } ++ cap_fault = container_of(hdr, struct vfio_region_info_cap_fault, ++ header); ++ if (cap_fault->version != 1) { ++ error_setg(errp, "Unsupported DMA FAULT API version %d", ++ cap_fault->version); ++ goto out; ++ } ++ ++ fault_region_name = g_strdup_printf("%s DMA FAULT %d", ++ vbasedev->name, ++ fault_region_info->index); ++ ++ ret = vfio_region_setup(OBJECT(vdev), vbasedev, ++ &vdev->dma_fault_region, ++ fault_region_info->index, ++ fault_region_name); ++ g_free(fault_region_name); ++ if (ret) { ++ error_setg_errno(errp, -ret, ++ "failed to set up the DMA FAULT region %d", ++ fault_region_info->index); ++ goto out; ++ } ++ ++ ret = vfio_region_mmap(&vdev->dma_fault_region); ++ if (ret) { ++ error_setg_errno(errp, -ret, "Failed to mmap the DMA FAULT queue"); ++ } ++out: ++ g_free(fault_region_info); ++} ++ + static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) + { + VFIODevice *vbasedev = &vdev->vbasedev; + struct vfio_region_info *reg_info; + struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) }; ++ Error *err = NULL; + int i, ret = -1; + + /* Sanity check device */ +@@ -2675,6 +2731,12 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) + } + } + ++ vfio_init_fault_regions(vdev, &err); ++ if (err) { ++ error_propagate(errp, err); ++ return; ++ } ++ + irq_info.index = VFIO_PCI_ERR_IRQ_INDEX; + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); +@@ -3260,6 +3322,7 @@ static void vfio_instance_finalize(Object *obj) + + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); ++ vfio_region_finalize(&vdev->dma_fault_region); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* +@@ -3280,6 +3343,7 @@ static void vfio_exitfn(PCIDevice *pdev) + vfio_unregister_req_notifier(vdev); + vfio_unregister_err_notifier(vdev); + vfio_unregister_ext_irq_notifiers(vdev); ++ vfio_region_exit(&vdev->dma_fault_region); + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + vfio_disable_interrupts(vdev); + if (vdev->intx.mmap_timer) { +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 893d074375..815154656c 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -141,6 +141,7 @@ typedef struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + VFIOPCIExtIRQ *ext_irqs; ++ VFIORegion dma_fault_region; + int (*resetfn)(struct VFIOPCIDevice *); + uint32_t vendor_id; + uint32_t device_id; +-- +2.27.0 + diff --git a/vfio.h-and-iommu.h-header-update-against-5.10.patch b/vfio.h-and-iommu.h-header-update-against-5.10.patch new file mode 100644 index 0000000000000000000000000000000000000000..721f2b6fcbc9de84c77b59ddf68da60d3d1fd255 --- /dev/null +++ b/vfio.h-and-iommu.h-header-update-against-5.10.patch @@ -0,0 +1,760 @@ +From 95435c6778f38dee9ed6f3ee6fd9e022107315d7 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Fri, 30 Jul 2021 09:15:31 +0800 +Subject: [PATCH] vfio.h and iommu.h header update against 5.10 + +Signed-off-by: Kunkun Jiang +--- + linux-headers/linux/iommu.h | 395 ++++++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio.h | 249 ++++++++++++++++++++++- + 2 files changed, 641 insertions(+), 3 deletions(-) + create mode 100644 linux-headers/linux/iommu.h + +diff --git a/linux-headers/linux/iommu.h b/linux-headers/linux/iommu.h +new file mode 100644 +index 0000000000..773b7dc2d6 +--- /dev/null ++++ b/linux-headers/linux/iommu.h +@@ -0,0 +1,395 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * IOMMU user API definitions ++ */ ++ ++#ifndef IOMMU_H ++#define IOMMU_H ++ ++#include ++ ++#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ ++#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ ++#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ ++#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ ++ ++/* Generic fault types, can be expanded IRQ remapping fault */ ++enum iommu_fault_type { ++ IOMMU_FAULT_DMA_UNRECOV = 1, /* unrecoverable fault */ ++ IOMMU_FAULT_PAGE_REQ, /* page request fault */ ++}; ++ ++enum iommu_fault_reason { ++ IOMMU_FAULT_REASON_UNKNOWN = 0, ++ ++ /* Could not access the PASID table (fetch caused external abort) */ ++ IOMMU_FAULT_REASON_PASID_FETCH, ++ ++ /* PASID entry is invalid or has configuration errors */ ++ IOMMU_FAULT_REASON_BAD_PASID_ENTRY, ++ ++ /* ++ * PASID is out of range (e.g. exceeds the maximum PASID ++ * supported by the IOMMU) or disabled. ++ */ ++ IOMMU_FAULT_REASON_PASID_INVALID, ++ ++ /* ++ * An external abort occurred fetching (or updating) a translation ++ * table descriptor ++ */ ++ IOMMU_FAULT_REASON_WALK_EABT, ++ ++ /* ++ * Could not access the page table entry (Bad address), ++ * actual translation fault ++ */ ++ IOMMU_FAULT_REASON_PTE_FETCH, ++ ++ /* Protection flag check failed */ ++ IOMMU_FAULT_REASON_PERMISSION, ++ ++ /* access flag check failed */ ++ IOMMU_FAULT_REASON_ACCESS, ++ ++ /* Output address of a translation stage caused Address Size fault */ ++ IOMMU_FAULT_REASON_OOR_ADDRESS, ++}; ++ ++/** ++ * struct iommu_fault_unrecoverable - Unrecoverable fault data ++ * @reason: reason of the fault, from &enum iommu_fault_reason ++ * @flags: parameters of this fault (IOMMU_FAULT_UNRECOV_* values) ++ * @pasid: Process Address Space ID ++ * @perm: requested permission access using by the incoming transaction ++ * (IOMMU_FAULT_PERM_* values) ++ * @addr: offending page address ++ * @fetch_addr: address that caused a fetch abort, if any ++ */ ++struct iommu_fault_unrecoverable { ++ __u32 reason; ++#define IOMMU_FAULT_UNRECOV_PASID_VALID (1 << 0) ++#define IOMMU_FAULT_UNRECOV_ADDR_VALID (1 << 1) ++#define IOMMU_FAULT_UNRECOV_FETCH_ADDR_VALID (1 << 2) ++ __u32 flags; ++ __u32 pasid; ++ __u32 perm; ++ __u64 addr; ++ __u64 fetch_addr; ++}; ++ ++/** ++ * struct iommu_fault_page_request - Page Request data ++ * @flags: encodes whether the corresponding fields are valid and whether this ++ * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). ++ * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response ++ * must have the same PASID value as the page request. When it is clear, ++ * the page response should not have a PASID. ++ * @pasid: Process Address Space ID ++ * @grpid: Page Request Group Index ++ * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) ++ * @addr: page address ++ * @private_data: device-specific private information ++ */ ++struct iommu_fault_page_request { ++#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) ++#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) ++#define IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA (1 << 2) ++#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 3) ++ __u32 flags; ++ __u32 pasid; ++ __u32 grpid; ++ __u32 perm; ++ __u64 addr; ++ __u64 private_data[2]; ++}; ++ ++/** ++ * struct iommu_fault - Generic fault data ++ * @type: fault type from &enum iommu_fault_type ++ * @padding: reserved for future use (should be zero) ++ * @event: fault event, when @type is %IOMMU_FAULT_DMA_UNRECOV ++ * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ ++ * @padding2: sets the fault size to allow for future extensions ++ */ ++struct iommu_fault { ++ __u32 type; ++ __u32 padding; ++ union { ++ struct iommu_fault_unrecoverable event; ++ struct iommu_fault_page_request prm; ++ __u8 padding2[56]; ++ }; ++}; ++ ++/** ++ * enum iommu_page_response_code - Return status of fault handlers ++ * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables ++ * populated, retry the access. This is "Success" in PCI PRI. ++ * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from ++ * this device if possible. This is "Response Failure" in PCI PRI. ++ * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the ++ * access. This is "Invalid Request" in PCI PRI. ++ */ ++enum iommu_page_response_code { ++ IOMMU_PAGE_RESP_SUCCESS = 0, ++ IOMMU_PAGE_RESP_INVALID, ++ IOMMU_PAGE_RESP_FAILURE, ++}; ++ ++/** ++ * struct iommu_page_response - Generic page response information ++ * @argsz: User filled size of this data ++ * @version: API version of this structure ++ * @flags: encodes whether the corresponding fields are valid ++ * (IOMMU_FAULT_PAGE_RESPONSE_* values) ++ * @pasid: Process Address Space ID ++ * @grpid: Page Request Group Index ++ * @code: response code from &enum iommu_page_response_code ++ */ ++struct iommu_page_response { ++ __u32 argsz; ++#define IOMMU_PAGE_RESP_VERSION_1 1 ++ __u32 version; ++#define IOMMU_PAGE_RESP_PASID_VALID (1 << 0) ++ __u32 flags; ++ __u32 pasid; ++ __u32 grpid; ++ __u32 code; ++}; ++ ++/* defines the granularity of the invalidation */ ++enum iommu_inv_granularity { ++ IOMMU_INV_GRANU_DOMAIN, /* domain-selective invalidation */ ++ IOMMU_INV_GRANU_PASID, /* PASID-selective invalidation */ ++ IOMMU_INV_GRANU_ADDR, /* page-selective invalidation */ ++ IOMMU_INV_GRANU_NR, /* number of invalidation granularities */ ++}; ++ ++/** ++ * struct iommu_inv_addr_info - Address Selective Invalidation Structure ++ * ++ * @flags: indicates the granularity of the address-selective invalidation ++ * - If the PASID bit is set, the @pasid field is populated and the invalidation ++ * relates to cache entries tagged with this PASID and matching the address ++ * range. ++ * - If ARCHID bit is set, @archid is populated and the invalidation relates ++ * to cache entries tagged with this architecture specific ID and matching ++ * the address range. ++ * - Both PASID and ARCHID can be set as they may tag different caches. ++ * - If neither PASID or ARCHID is set, global addr invalidation applies. ++ * - The LEAF flag indicates whether only the leaf PTE caching needs to be ++ * invalidated and other paging structure caches can be preserved. ++ * @pasid: process address space ID ++ * @archid: architecture-specific ID ++ * @addr: first stage/level input address ++ * @granule_size: page/block size of the mapping in bytes ++ * @nb_granules: number of contiguous granules to be invalidated ++ */ ++struct iommu_inv_addr_info { ++#define IOMMU_INV_ADDR_FLAGS_PASID (1 << 0) ++#define IOMMU_INV_ADDR_FLAGS_ARCHID (1 << 1) ++#define IOMMU_INV_ADDR_FLAGS_LEAF (1 << 2) ++ __u32 flags; ++ __u32 archid; ++ __u64 pasid; ++ __u64 addr; ++ __u64 granule_size; ++ __u64 nb_granules; ++}; ++ ++/** ++ * struct iommu_inv_pasid_info - PASID Selective Invalidation Structure ++ * ++ * @flags: indicates the granularity of the PASID-selective invalidation ++ * - If the PASID bit is set, the @pasid field is populated and the invalidation ++ * relates to cache entries tagged with this PASID and matching the address ++ * range. ++ * - If the ARCHID bit is set, the @archid is populated and the invalidation ++ * relates to cache entries tagged with this architecture specific ID and ++ * matching the address range. ++ * - Both PASID and ARCHID can be set as they may tag different caches. ++ * - At least one of PASID or ARCHID must be set. ++ * @pasid: process address space ID ++ * @archid: architecture-specific ID ++ */ ++struct iommu_inv_pasid_info { ++#define IOMMU_INV_PASID_FLAGS_PASID (1 << 0) ++#define IOMMU_INV_PASID_FLAGS_ARCHID (1 << 1) ++ __u32 flags; ++ __u32 archid; ++ __u64 pasid; ++}; ++ ++/** ++ * struct iommu_cache_invalidate_info - First level/stage invalidation ++ * information ++ * @argsz: User filled size of this data ++ * @version: API version of this structure ++ * @cache: bitfield that allows to select which caches to invalidate ++ * @granularity: defines the lowest granularity used for the invalidation: ++ * domain > PASID > addr ++ * @padding: reserved for future use (should be zero) ++ * @pasid_info: invalidation data when @granularity is %IOMMU_INV_GRANU_PASID ++ * @addr_info: invalidation data when @granularity is %IOMMU_INV_GRANU_ADDR ++ * ++ * Not all the combinations of cache/granularity are valid: ++ * ++ * +--------------+---------------+---------------+---------------+ ++ * | type / | DEV_IOTLB | IOTLB | PASID | ++ * | granularity | | | cache | ++ * +==============+===============+===============+===============+ ++ * | DOMAIN | N/A | Y | Y | ++ * +--------------+---------------+---------------+---------------+ ++ * | PASID | Y | Y | Y | ++ * +--------------+---------------+---------------+---------------+ ++ * | ADDR | Y | Y | N/A | ++ * +--------------+---------------+---------------+---------------+ ++ * ++ * Invalidations by %IOMMU_INV_GRANU_DOMAIN don't take any argument other than ++ * @version and @cache. ++ * ++ * If multiple cache types are invalidated simultaneously, they all ++ * must support the used granularity. ++ */ ++struct iommu_cache_invalidate_info { ++ __u32 argsz; ++#define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 ++ __u32 version; ++/* IOMMU paging structure cache */ ++#define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ ++#define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ ++#define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ ++#define IOMMU_CACHE_INV_TYPE_NR (3) ++ __u8 cache; ++ __u8 granularity; ++ __u8 padding[6]; ++ union { ++ struct iommu_inv_pasid_info pasid_info; ++ struct iommu_inv_addr_info addr_info; ++ } granu; ++}; ++ ++/** ++ * struct iommu_gpasid_bind_data_vtd - Intel VT-d specific data on device and guest ++ * SVA binding. ++ * ++ * @flags: VT-d PASID table entry attributes ++ * @pat: Page attribute table data to compute effective memory type ++ * @emt: Extended memory type ++ * ++ * Only guest vIOMMU selectable and effective options are passed down to ++ * the host IOMMU. ++ */ ++struct iommu_gpasid_bind_data_vtd { ++#define IOMMU_SVA_VTD_GPASID_SRE (1 << 0) /* supervisor request */ ++#define IOMMU_SVA_VTD_GPASID_EAFE (1 << 1) /* extended access enable */ ++#define IOMMU_SVA_VTD_GPASID_PCD (1 << 2) /* page-level cache disable */ ++#define IOMMU_SVA_VTD_GPASID_PWT (1 << 3) /* page-level write through */ ++#define IOMMU_SVA_VTD_GPASID_EMTE (1 << 4) /* extended mem type enable */ ++#define IOMMU_SVA_VTD_GPASID_CD (1 << 5) /* PASID-level cache disable */ ++#define IOMMU_SVA_VTD_GPASID_LAST (1 << 6) ++ __u64 flags; ++ __u32 pat; ++ __u32 emt; ++}; ++ ++#define IOMMU_SVA_VTD_GPASID_MTS_MASK (IOMMU_SVA_VTD_GPASID_CD | \ ++ IOMMU_SVA_VTD_GPASID_EMTE | \ ++ IOMMU_SVA_VTD_GPASID_PCD | \ ++ IOMMU_SVA_VTD_GPASID_PWT) ++ ++/** ++ * struct iommu_gpasid_bind_data - Information about device and guest PASID binding ++ * @argsz: User filled size of this data ++ * @version: Version of this data structure ++ * @format: PASID table entry format ++ * @flags: Additional information on guest bind request ++ * @gpgd: Guest page directory base of the guest mm to bind ++ * @hpasid: Process address space ID used for the guest mm in host IOMMU ++ * @gpasid: Process address space ID used for the guest mm in guest IOMMU ++ * @addr_width: Guest virtual address width ++ * @padding: Reserved for future use (should be zero) ++ * @vtd: Intel VT-d specific data ++ * ++ * Guest to host PASID mapping can be an identity or non-identity, where guest ++ * has its own PASID space. For non-identify mapping, guest to host PASID lookup ++ * is needed when VM programs guest PASID into an assigned device. VMM may ++ * trap such PASID programming then request host IOMMU driver to convert guest ++ * PASID to host PASID based on this bind data. ++ */ ++struct iommu_gpasid_bind_data { ++ __u32 argsz; ++#define IOMMU_GPASID_BIND_VERSION_1 1 ++ __u32 version; ++#define IOMMU_PASID_FORMAT_INTEL_VTD 1 ++#define IOMMU_PASID_FORMAT_LAST 2 ++ __u32 format; ++ __u32 addr_width; ++#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */ ++ __u64 flags; ++ __u64 gpgd; ++ __u64 hpasid; ++ __u64 gpasid; ++ __u8 padding[8]; ++ /* Vendor specific data */ ++ union { ++ struct iommu_gpasid_bind_data_vtd vtd; ++ } vendor; ++}; ++ ++/** ++ * struct iommu_pasid_smmuv3 - ARM SMMUv3 Stream Table Entry stage 1 related ++ * information ++ * @version: API version of this structure ++ * @s1fmt: STE s1fmt (format of the CD table: single CD, linear table ++ * or 2-level table) ++ * @s1dss: STE s1dss (specifies the behavior when @pasid_bits != 0 ++ * and no PASID is passed along with the incoming transaction) ++ * @padding: reserved for future use (should be zero) ++ * ++ * The PASID table is referred to as the Context Descriptor (CD) table on ARM ++ * SMMUv3. Please refer to the ARM SMMU 3.x spec (ARM IHI 0070A) for full ++ * details. ++ */ ++struct iommu_pasid_smmuv3 { ++#define PASID_TABLE_SMMUV3_CFG_VERSION_1 1 ++ __u32 version; ++ __u8 s1fmt; ++ __u8 s1dss; ++ __u8 padding[2]; ++}; ++ ++/** ++ * struct iommu_pasid_table_config - PASID table data used to bind guest PASID ++ * table to the host IOMMU ++ * @argsz: User filled size of this data ++ * @version: API version to prepare for future extensions ++ * @base_ptr: guest physical address of the PASID table ++ * @format: format of the PASID table ++ * @pasid_bits: number of PASID bits used in the PASID table ++ * @config: indicates whether the guest translation stage must ++ * be translated, bypassed or aborted. ++ * @padding: reserved for future use (should be zero) ++ * @vendor_data.smmuv3: table information when @format is ++ * %IOMMU_PASID_FORMAT_SMMUV3 ++ */ ++struct iommu_pasid_table_config { ++ __u32 argsz; ++#define PASID_TABLE_CFG_VERSION_1 1 ++ __u32 version; ++ __u64 base_ptr; ++#define IOMMU_PASID_FORMAT_SMMUV3 1 ++ __u32 format; ++ __u8 pasid_bits; ++#define IOMMU_PASID_CONFIG_TRANSLATE 1 ++#define IOMMU_PASID_CONFIG_BYPASS 2 ++#define IOMMU_PASID_CONFIG_ABORT 3 ++ __u8 config; ++ __u8 padding[2]; ++ union { ++ struct iommu_pasid_smmuv3 smmuv3; ++ } vendor_data; ++}; ++ ++#endif /* _UAPI_IOMMU_H */ +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 120387ba58..d6edfbd2f5 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -14,6 +14,7 @@ + + #include + #include ++#include + + #define VFIO_API_VERSION 0 + +@@ -211,8 +212,11 @@ struct vfio_device_info { + #define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ + #define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ + #define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ ++#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ ++#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ ++ __u32 cap_offset; /* Offset within info struct of first cap */ + }; + #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) + +@@ -228,6 +232,15 @@ struct vfio_device_info { + #define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" + #define VFIO_DEVICE_API_AP_STRING "vfio-ap" + ++/* ++ * The following capabilities are unique to s390 zPCI devices. Their contents ++ * are further-defined in vfio_zdev.h ++ */ ++#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 ++#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 ++#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 ++#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 ++ + /** + * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, + * struct vfio_region_info) +@@ -316,6 +329,7 @@ struct vfio_region_info_cap_type { + #define VFIO_REGION_TYPE_GFX (1) + #define VFIO_REGION_TYPE_CCW (2) + #define VFIO_REGION_TYPE_MIGRATION (3) ++#define VFIO_REGION_TYPE_NESTED (4) + + /* sub-types for VFIO_REGION_TYPE_PCI_* */ + +@@ -340,6 +354,10 @@ struct vfio_region_info_cap_type { + /* sub-types for VFIO_REGION_TYPE_GFX */ + #define VFIO_REGION_SUBTYPE_GFX_EDID (1) + ++/* sub-types for VFIO_REGION_TYPE_NESTED */ ++#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT (1) ++#define VFIO_REGION_SUBTYPE_NESTED_DMA_FAULT_RESPONSE (2) ++ + /** + * struct vfio_region_gfx_edid - EDID region layout. + * +@@ -472,7 +490,7 @@ struct vfio_region_gfx_edid { + * 5. Resumed + * |--------->| + * +- * 0. Default state of VFIO device is _RUNNNG when the user application starts. ++ * 0. Default state of VFIO device is _RUNNING when the user application starts. + * 1. During normal shutdown of the user application, the user application may + * optionally change the VFIO device state from _RUNNING to _STOP. This + * transition is optional. The vendor driver must support this transition but +@@ -695,11 +713,30 @@ struct vfio_irq_info { + #define VFIO_IRQ_INFO_MASKABLE (1 << 1) + #define VFIO_IRQ_INFO_AUTOMASKED (1 << 2) + #define VFIO_IRQ_INFO_NORESIZE (1 << 3) ++#define VFIO_IRQ_INFO_FLAG_CAPS (1 << 4) /* Info supports caps */ + __u32 index; /* IRQ index */ + __u32 count; /* Number of IRQs within this index */ ++ __u32 cap_offset; /* Offset within info struct of first cap */ + }; + #define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9) + ++/* ++ * The irq type capability allows IRQs unique to a specific device or ++ * class of devices to be exposed. ++ * ++ * The structures below define version 1 of this capability. ++ */ ++#define VFIO_IRQ_INFO_CAP_TYPE 3 ++ ++struct vfio_irq_info_cap_type { ++ struct vfio_info_cap_header header; ++ __u32 type; /* global per bus driver */ ++ __u32 subtype; /* type specific */ ++}; ++ ++#define VFIO_IRQ_TYPE_NESTED (1) ++#define VFIO_IRQ_SUBTYPE_DMA_FAULT (1) ++ + /** + * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set) + * +@@ -801,7 +838,8 @@ enum { + VFIO_PCI_MSIX_IRQ_INDEX, + VFIO_PCI_ERR_IRQ_INDEX, + VFIO_PCI_REQ_IRQ_INDEX, +- VFIO_PCI_NUM_IRQS ++ VFIO_PCI_NUM_IRQS = 5 /* Fixed user ABI, IRQ indexes >=5 use */ ++ /* device specific cap to define content */ + }; + + /* +@@ -985,6 +1023,68 @@ struct vfio_device_feature { + */ + #define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) + ++/* ++ * Capability exposed by the DMA fault region ++ * @version: ABI version ++ */ ++#define VFIO_REGION_INFO_CAP_DMA_FAULT 6 ++ ++struct vfio_region_info_cap_fault { ++ struct vfio_info_cap_header header; ++ __u32 version; ++}; ++ ++/* ++ * Capability exposed by the DMA fault response region ++ * @version: ABI version ++ */ ++#define VFIO_REGION_INFO_CAP_DMA_FAULT_RESPONSE 7 ++ ++struct vfio_region_info_cap_fault_response { ++ struct vfio_info_cap_header header; ++ __u32 version; ++}; ++ ++/* ++ * DMA Fault Region Layout ++ * @tail: index relative to the start of the ring buffer at which the ++ * consumer finds the next item in the buffer ++ * @entry_size: fault ring buffer entry size in bytes ++ * @nb_entries: max capacity of the fault ring buffer ++ * @offset: ring buffer offset relative to the start of the region ++ * @head: index relative to the start of the ring buffer at which the ++ * producer (kernel) inserts items into the buffers ++ */ ++struct vfio_region_dma_fault { ++ /* Write-Only */ ++ __u32 tail; ++ /* Read-Only */ ++ __u32 entry_size; ++ __u32 nb_entries; ++ __u32 offset; ++ __u32 head; ++}; ++ ++/* ++ * DMA Fault Response Region Layout ++ * @head: index relative to the start of the ring buffer at which the ++ * producer (userspace) insert responses into the buffer ++ * @entry_size: fault ring buffer entry size in bytes ++ * @nb_entries: max capacity of the fault ring buffer ++ * @offset: ring buffer offset relative to the start of the region ++ * @tail: index relative to the start of the ring buffer at which the ++ * consumer (kernel) finds the next item in the buffer ++ */ ++struct vfio_region_dma_fault_response { ++ /* Write-Only */ ++ __u32 head; ++ /* Read-Only */ ++ __u32 entry_size; ++ __u32 nb_entries; ++ __u32 offset; ++ __u32 tail; ++}; ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +@@ -1049,6 +1149,21 @@ struct vfio_iommu_type1_info_cap_migration { + __u64 max_dirty_bitmap_size; /* in bytes */ + }; + ++/* ++ * The DMA available capability allows to report the current number of ++ * simultaneously outstanding DMA mappings that are allowed. ++ * ++ * The structure below defines version 1 of this capability. ++ * ++ * avail: specifies the current number of outstanding DMA mappings allowed. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 ++ ++struct vfio_iommu_type1_info_dma_avail { ++ struct vfio_info_cap_header header; ++ __u32 avail; ++}; ++ + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + + /** +@@ -1072,7 +1187,7 @@ struct vfio_iommu_type1_dma_map { + struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ +- __u64 *data; /* one bit per page */ ++ __u64 *data; /* one bit per page */ + }; + + /** +@@ -1188,6 +1303,134 @@ struct vfio_iommu_type1_dirty_bitmap_get { + + #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + ++/* ++ * VFIO_IOMMU_BIND_PROCESS ++ * ++ * Allocate a PASID for a process address space, and use it to attach this ++ * process to all devices in the container. Devices can then tag their DMA ++ * traffic with the returned @pasid to perform transactions on the associated ++ * virtual address space. Mapping and unmapping buffers is performed by standard ++ * functions such as mmap and malloc. ++ * ++ * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to ++ * bind. Otherwise the current task is bound. Given that the caller owns the ++ * device, setting this flag grants the caller read and write permissions on the ++ * entire address space of foreign process described by @pid. Therefore, ++ * permission to perform the bind operation on a foreign process is governed by ++ * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2) ++ * for more information. ++ * ++ * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This ++ * ID is unique to a process and can be used on all devices in the container. ++ * ++ * On fork, the child inherits the device fd and can use the bonds setup by its ++ * parent. Consequently, the child has R/W access on the address spaces bound by ++ * its parent. After an execv, the device fd is closed and the child doesn't ++ * have access to the address space anymore. ++ * ++ * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is ++ * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND, ++ * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current ++ * task from the container. ++ */ ++struct vfio_iommu_type1_bind_process { ++ __u32 flags; ++#define VFIO_IOMMU_BIND_PID (1 << 0) ++ __u32 pasid; ++ __s32 pid; ++}; ++ ++/* ++ * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes ++ * vfio_iommu_type1_bind_process in data. ++ */ ++struct vfio_iommu_type1_bind { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_IOMMU_BIND_PROCESS (1 << 0) ++ __u8 data[]; ++}; ++ ++/* ++ * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind) ++ * ++ * Manage address spaces of devices in this container. Initially a TYPE1 ++ * container can only have one address space, managed with ++ * VFIO_IOMMU_MAP/UNMAP_DMA. ++ * ++ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP ++ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page ++ * tables, and BIND manages the stage-1 (guest) page tables. Other types of ++ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls ++ * non-PASID traffic and BIND controls PASID traffic. But this depends on the ++ * underlying IOMMU architecture and isn't guaranteed. ++ * ++ * Availability of this feature depends on the device, its bus, the underlying ++ * IOMMU and the CPU architecture. ++ * ++ * returns: 0 on success, -errno on failure. ++ */ ++#define VFIO_IOMMU_BIND _IO(VFIO_TYPE, VFIO_BASE + 22) ++ ++/* ++ * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind) ++ * ++ * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl. ++ */ ++#define VFIO_IOMMU_UNBIND _IO(VFIO_TYPE, VFIO_BASE + 23) ++ ++/* ++ * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 18, ++ * struct vfio_iommu_type1_set_pasid_table) ++ * ++ * The SET operation passes a PASID table to the host while the ++ * UNSET operation detaches the one currently programmed. It is ++ * allowed to "SET" the table several times without unsetting as ++ * long as the table config does not stay IOMMU_PASID_CONFIG_TRANSLATE. ++ */ ++struct vfio_iommu_type1_set_pasid_table { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_PASID_TABLE_FLAG_SET (1 << 0) ++#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1) ++ struct iommu_pasid_table_config config; /* used on SET */ ++}; ++ ++#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 18) ++ ++/** ++ * VFIO_IOMMU_CACHE_INVALIDATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, ++ * struct vfio_iommu_type1_cache_invalidate) ++ * ++ * Propagate guest IOMMU cache invalidation to the host. ++ */ ++struct vfio_iommu_type1_cache_invalidate { ++ __u32 argsz; ++ __u32 flags; ++ struct iommu_cache_invalidate_info info; ++}; ++#define VFIO_IOMMU_CACHE_INVALIDATE _IO(VFIO_TYPE, VFIO_BASE + 19) ++ ++/** ++ * VFIO_IOMMU_SET_MSI_BINDING - _IOWR(VFIO_TYPE, VFIO_BASE + 20, ++ * struct vfio_iommu_type1_set_msi_binding) ++ * ++ * Pass a stage 1 MSI doorbell mapping to the host so that this ++ * latter can build a nested stage2 mapping. Or conversely tear ++ * down a previously bound stage 1 MSI binding. ++ */ ++struct vfio_iommu_type1_set_msi_binding { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_IOMMU_BIND_MSI (1 << 0) ++#define VFIO_IOMMU_UNBIND_MSI (1 << 1) ++ __u64 iova; /* MSI guest IOVA */ ++ /* Fields below are used on BIND */ ++ __u64 gpa; /* MSI guest physical address */ ++ __u64 size; /* size of stage1 mapping (bytes) */ ++}; ++#define VFIO_IOMMU_SET_MSI_BINDING _IO(VFIO_TYPE, VFIO_BASE + 20) ++ + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + + /* +-- +2.27.0 + diff --git a/vhost-Add-names-to-section-rounded-warning.patch b/vhost-Add-names-to-section-rounded-warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..09c36eda5b8f7063d3543ec1adab3349bf87ba7b --- /dev/null +++ b/vhost-Add-names-to-section-rounded-warning.patch @@ -0,0 +1,37 @@ +From 437a9d2c7e48495ffc467808eece045579956c79 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 16 Jan 2020 20:24:13 +0000 +Subject: [PATCH] vhost: Add names to section rounded warning + +Add the memory region names to section rounding/alignment +warnings. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20200116202414.157959-2-dgilbert@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/virtio/vhost.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 9c16f0d107..ae61c33c15 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -591,9 +591,10 @@ static void vhost_region_add_section(struct vhost_dev *dev, + * match up in the same RAMBlock if they do. + */ + if (mrs_gpa < prev_gpa_start) { +- error_report("%s:Section rounded to %"PRIx64 +- " prior to previous %"PRIx64, +- __func__, mrs_gpa, prev_gpa_start); ++ error_report("%s:Section '%s' rounded to %"PRIx64 ++ " prior to previous '%s' %"PRIx64, ++ __func__, section->mr->name, mrs_gpa, ++ prev_sec->mr->name, prev_gpa_start); + /* A way to cleanly fail here would be better */ + return; + } +-- +2.27.0 + diff --git a/vhost-user-Print-unexpected-slave-message-types.patch b/vhost-user-Print-unexpected-slave-message-types.patch new file mode 100644 index 0000000000000000000000000000000000000000..4287428e059d06ffaac516ed2c2aa83b4f5d4e98 --- /dev/null +++ b/vhost-user-Print-unexpected-slave-message-types.patch @@ -0,0 +1,35 @@ +From 6e084ff24ad73eb4f7541573c6097013f5b94959 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 7 Feb 2019 18:22:40 +0000 +Subject: [PATCH] vhost-user: Print unexpected slave message types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When we receive an unexpected message type on the slave fd, print +the type. + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Dr. David Alan Gilbert +--- + hw/virtio/vhost-user.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 4ca5b2551e..f012774210 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -1054,7 +1054,7 @@ static void slave_read(void *opaque) + fd[0]); + break; + default: +- error_report("Received unexpected msg type."); ++ error_report("Received unexpected msg type: %d.", hdr.request); + ret = -EINVAL; + } + +-- +2.27.0 + diff --git a/vhost-user-blk-convert-to-new-virtio_delete_queue.patch b/vhost-user-blk-convert-to-new-virtio_delete_queue.patch new file mode 100644 index 0000000000000000000000000000000000000000..3f419966ec034fe4eecd8e6606b6e7dc611517e1 --- /dev/null +++ b/vhost-user-blk-convert-to-new-virtio_delete_queue.patch @@ -0,0 +1,99 @@ +From 30d20e1258722431198cd2a8298c85b7af2a0c1b Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Mon, 24 Feb 2020 12:13:36 +0800 +Subject: [PATCH 5/9] vhost-user-blk: convert to new virtio_delete_queue + +use the new virtio_delete_queue function to cleanup. + +Signed-off-by: Pan Nengyuan +Message-Id: <20200224041336.30790-3-pannengyuan@huawei.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/block/vhost-user-blk.c | 20 ++++++++++++-------- + include/hw/virtio/vhost-user-blk.h | 4 +++- + 2 files changed, 15 insertions(+), 9 deletions(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index dbc0a2e..146b927 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -303,7 +303,7 @@ static int vhost_user_blk_connect(DeviceState *dev) + s->connected = true; + + s->dev.nvqs = s->num_queues; +- s->dev.vqs = s->vqs; ++ s->dev.vqs = s->vhost_vqs; + s->dev.vq_index = 0; + s->dev.backend_features = 0; + +@@ -430,13 +430,15 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + sizeof(struct virtio_blk_config)); + ++ s->virtqs = g_new(VirtQueue *, s->num_queues); + for (i = 0; i < s->num_queues; i++) { +- virtio_add_queue(vdev, s->queue_size, +- vhost_user_blk_handle_output); ++ s->virtqs[i] = virtio_add_queue(vdev, s->queue_size, ++ vhost_user_blk_handle_output); + } + + s->inflight = g_new0(struct vhost_inflight, 1); +- s->vqs = g_new(struct vhost_virtqueue, s->num_queues); ++ s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); ++ s->watch = 0; + s->connected = false; + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, +@@ -467,11 +469,12 @@ reconnect: + return; + + virtio_err: +- g_free(s->vqs); ++ g_free(s->vhost_vqs); + g_free(s->inflight); + for (i = 0; i < s->num_queues; i++) { +- virtio_del_queue(vdev, i); ++ virtio_delete_queue(s->virtqs[i]); + } ++ g_free(s->virtqs); + virtio_cleanup(vdev); + vhost_user_cleanup(&s->vhost_user); + } +@@ -487,12 +490,13 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) + NULL, NULL, NULL, false); + vhost_dev_cleanup(&s->dev); + vhost_dev_free_inflight(s->inflight); +- g_free(s->vqs); ++ g_free(s->vhost_vqs); + g_free(s->inflight); + + for (i = 0; i < s->num_queues; i++) { +- virtio_del_queue(vdev, i); ++ virtio_delete_queue(s->virtqs[i]); + } ++ g_free(s->virtqs); + virtio_cleanup(vdev); + vhost_user_cleanup(&s->vhost_user); + } +diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h +index ad9b742..29375dd 100644 +--- a/include/hw/virtio/vhost-user-blk.h ++++ b/include/hw/virtio/vhost-user-blk.h +@@ -37,7 +37,9 @@ typedef struct VHostUserBlk { + struct vhost_dev dev; + struct vhost_inflight *inflight; + VhostUserState vhost_user; +- struct vhost_virtqueue *vqs; ++ struct vhost_virtqueue *vhost_vqs; ++ VirtQueue **virtqs; ++ guint watch; + bool connected; + } VHostUserBlk; + +-- +1.8.3.1 + diff --git a/vhost-user-blk-delay-vhost_user_blk_disconnect.patch b/vhost-user-blk-delay-vhost_user_blk_disconnect.patch new file mode 100644 index 0000000000000000000000000000000000000000..422e2a17b028d83690cc620a57829260c76aab52 --- /dev/null +++ b/vhost-user-blk-delay-vhost_user_blk_disconnect.patch @@ -0,0 +1,90 @@ +From 632a841b6ba547906b475250f5c2cb46774ab4af Mon Sep 17 00:00:00 2001 +From: Dima Stepanov +Date: Thu, 28 May 2020 12:11:19 +0300 +Subject: [PATCH 14/14] vhost-user-blk: delay vhost_user_blk_disconnect + +A socket write during vhost-user communication may trigger a disconnect +event, calling vhost_user_blk_disconnect() and clearing all the +vhost_dev structures holding data that vhost-user functions expect to +remain valid to roll back initialization correctly. Delay the cleanup to +keep vhost_dev structure valid. +There are two possible states to handle: +1. RUN_STATE_PRELAUNCH: skip bh oneshot call and perform disconnect in +the caller routine. +2. RUN_STATE_RUNNING: delay by using bh + +BH changes are based on the similar changes for the vhost-user-net +device: + commit e7c83a885f865128ae3cf1946f8cb538b63cbfba + "vhost-user: delay vhost_user_stop" + +Signed-off-by: Dima Stepanov +Message-Id: <69b73b94dcd066065595266c852810e0863a0895.1590396396.git.dimastep@yandex-team.ru> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Li Feng +Reviewed-by: Raphael Norwitz +Signed-off-by: Peng Liang +--- + hw/block/vhost-user-blk.c | 38 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 37 insertions(+), 1 deletion(-) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index dc66f8a5febd..6b719d1d80e1 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -346,6 +346,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev) + vhost_dev_cleanup(&s->dev); + } + ++static void vhost_user_blk_event(void *opaque, int event); ++ ++static void vhost_user_blk_chr_closed_bh(void *opaque) ++{ ++ DeviceState *dev = opaque; ++ VirtIODevice *vdev = VIRTIO_DEVICE(dev); ++ VHostUserBlk *s = VHOST_USER_BLK(vdev); ++ ++ vhost_user_blk_disconnect(dev); ++ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, ++ NULL, opaque, NULL, true); ++} ++ + static void vhost_user_blk_event(void *opaque, int event) + { + DeviceState *dev = opaque; +@@ -360,7 +373,30 @@ static void vhost_user_blk_event(void *opaque, int event) + } + break; + case CHR_EVENT_CLOSED: +- vhost_user_blk_disconnect(dev); ++ /* ++ * A close event may happen during a read/write, but vhost ++ * code assumes the vhost_dev remains setup, so delay the ++ * stop & clear. There are two possible paths to hit this ++ * disconnect event: ++ * 1. When VM is in the RUN_STATE_PRELAUNCH state. The ++ * vhost_user_blk_device_realize() is a caller. ++ * 2. In tha main loop phase after VM start. ++ * ++ * For p2 the disconnect event will be delayed. We can't ++ * do the same for p1, because we are not running the loop ++ * at this moment. So just skip this step and perform ++ * disconnect in the caller function. ++ * ++ * TODO: maybe it is a good idea to make the same fix ++ * for other vhost-user devices. ++ */ ++ if (runstate_is_running()) { ++ AioContext *ctx = qemu_get_current_aio_context(); ++ ++ qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, ++ NULL, NULL, false); ++ aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); ++ } + break; + } + } +-- +2.26.2 + diff --git a/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch b/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..e57f5b59c972cf4c769d7b87cc2a654b72eb3b64 --- /dev/null +++ b/vhost-user-blk-delete-virtioqueues-in-unrealize-to-f.patch @@ -0,0 +1,69 @@ +From d8febdc4940d719dba77a17a10a8d36ad08305ab Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Mon, 24 Feb 2020 12:13:35 +0800 +Subject: [PATCH 4/9] vhost-user-blk: delete virtioqueues in unrealize to fix + memleaks + +virtio queues forgot to delete in unrealize, and aslo error path in +realize, this patch fix these memleaks, the leak stack is as follow: + +Direct leak of 114688 byte(s) in 16 object(s) allocated from: + #0 0x7f24024fdbf0 in calloc (/lib64/libasan.so.3+0xcabf0) + #1 0x7f2401642015 in g_malloc0 (/lib64/libglib-2.0.so.0+0x50015) + #2 0x55ad175a6447 in virtio_add_queue /mnt/sdb/qemu/hw/virtio/virtio.c:2327 + #3 0x55ad17570cf9 in vhost_user_blk_device_realize /mnt/sdb/qemu/hw/block/vhost-user-blk.c:419 + #4 0x55ad175a3707 in virtio_device_realize /mnt/sdb/qemu/hw/virtio/virtio.c:3509 + #5 0x55ad176ad0d1 in device_set_realized /mnt/sdb/qemu/hw/core/qdev.c:876 + #6 0x55ad1781ff9d in property_set_bool /mnt/sdb/qemu/qom/object.c:2080 + #7 0x55ad178245ae in object_property_set_qobject /mnt/sdb/qemu/qom/qom-qobject.c:26 + #8 0x55ad17821eb4 in object_property_set_bool /mnt/sdb/qemu/qom/object.c:1338 + #9 0x55ad177aeed7 in virtio_pci_realize /mnt/sdb/qemu/hw/virtio/virtio-pci.c:1801 + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Reviewed-by: Stefan Hajnoczi +Message-Id: <20200224041336.30790-2-pannengyuan@huawei.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/block/vhost-user-blk.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c +index 6b719d1..dbc0a2e 100644 +--- a/hw/block/vhost-user-blk.c ++++ b/hw/block/vhost-user-blk.c +@@ -469,6 +469,9 @@ reconnect: + virtio_err: + g_free(s->vqs); + g_free(s->inflight); ++ for (i = 0; i < s->num_queues; i++) { ++ virtio_del_queue(vdev, i); ++ } + virtio_cleanup(vdev); + vhost_user_cleanup(&s->vhost_user); + } +@@ -477,6 +480,7 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(dev); ++ int i; + + virtio_set_status(vdev, 0); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, +@@ -485,6 +489,10 @@ static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) + vhost_dev_free_inflight(s->inflight); + g_free(s->vqs); + g_free(s->inflight); ++ ++ for (i = 0; i < s->num_queues; i++) { ++ virtio_del_queue(vdev, i); ++ } + virtio_cleanup(vdev); + vhost_user_cleanup(&s->vhost_user); + } +-- +1.8.3.1 + diff --git a/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch b/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch new file mode 100644 index 0000000000000000000000000000000000000000..11f9ce807bca6c991b77e007d078a73289d27e2e --- /dev/null +++ b/vhost-user-gpu-fix-OOB-write-in-virgl_cmd_get_capset.patch @@ -0,0 +1,51 @@ +From acb9f3aadde7222eacf95b2d70204dd6f8351ed7 Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 10:14:06 +0800 +Subject: [PATCH] vhost-user-gpu: fix OOB write in 'virgl_cmd_get_capset' + (CVE-2021-3546) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +If 'virgl_cmd_get_capset' set 'max_size' to 0, +the 'virgl_renderer_fill_caps' will write the data after the 'resp'. +This patch avoid this by checking the returned 'max_size'. + +virtio-gpu fix: abd7f08b + + ("display: virtio-gpu-3d: check +virgl capabilities max_size") + +Fixes: CVE-2021-3546 +Reported-by: default avatarLi Qiang +Reviewed-by: default avatarPrasad J Pandit +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-8-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/virgl.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index 44e79ab82a..ad2834902b 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -173,6 +173,10 @@ virgl_cmd_get_capset(VuGpu *g, + + virgl_renderer_get_cap_set(gc.capset_id, &max_ver, + &max_size); ++ if (!max_size) { ++ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; ++ return; ++ } + resp = g_malloc0(sizeof(*resp) + max_size); + + resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET; +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch b/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch new file mode 100644 index 0000000000000000000000000000000000000000..46353183baae4d3465048614bb198988597901a7 --- /dev/null +++ b/vhost-user-gpu-fix-memory-disclosure-in-virgl_cmd_ge.patch @@ -0,0 +1,44 @@ +From 511cac8cbc60fafdae2589d674b7aeab15388eef Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 10:11:17 +0800 +Subject: [PATCH] vhost-user-gpu: fix memory disclosure in + virgl_cmd_get_capset_info (CVE-2021-3545) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +Otherwise some of the 'resp' will be leaked to guest. + +Fixes: CVE-2021-3545 +Reported-by: default avatarLi Qiang +virtio-gpu fix: 42a8dadc + + ("virtio-gpu: fix information leak +in getting capset info dispatch") +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-2-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/virgl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index 79556df094..44e79ab82a 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -131,6 +131,7 @@ virgl_cmd_get_capset_info(VuGpu *g, + + VUGPU_FILL_CMD(info); + ++ memset(&resp, 0, sizeof(resp)); + if (info.capset_index == 0) { + resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL; + virgl_renderer_get_cap_set(resp.capset_id, +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch b/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch new file mode 100644 index 0000000000000000000000000000000000000000..7c44ec4578db09283d0a6fa67c85298679472f4d --- /dev/null +++ b/vhost-user-gpu-fix-memory-leak-in-vg_resource_attach.patch @@ -0,0 +1,49 @@ +From b9f6004899adb8e501e1b9ce1cb0976a2268ad60 Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 09:56:42 +0800 +Subject: [PATCH] vhost-user-gpu: fix memory leak in vg_resource_attach_backing + (CVE-2021-3544) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +Check whether the 'res' has already been attach_backing to avoid +memory leak. + +Fixes: CVE-2021-3544 +Reported-by: default avatarLi Qiang +virtio-gpu fix: 204f01b3 + + ("virtio-gpu: fix memory leak +in resource attach backing") +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-4-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +index f69af7d17f..4f087d6000 100644 +--- a/contrib/vhost-user-gpu/main.c ++++ b/contrib/vhost-user-gpu/main.c +@@ -468,6 +468,11 @@ vg_resource_attach_backing(VuGpu *g, + return; + } + ++ if (res->iov) { ++ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; ++ return; ++ } ++ + ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov); + if (ret != 0) { + cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch b/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch new file mode 100644 index 0000000000000000000000000000000000000000..c0dccd7a33a11fbcc7e14ed605f8eeff080e3c9a --- /dev/null +++ b/vhost-user-gpu-fix-memory-leak-in-virgl_cmd_resource.patch @@ -0,0 +1,57 @@ +From 5bdbe19681e151318b749cb6b2443626bf54b82e Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 10:05:40 +0800 +Subject: [PATCH] vhost-user-gpu: fix memory leak in 'virgl_cmd_resource_unref' + (CVE-2021-3544) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +The 'res->iov' will be leaked if the guest trigger following sequences: + + virgl_cmd_create_resource_2d + virgl_resource_attach_backing + virgl_cmd_resource_unref + +This patch fixes this. + +Fixes: CVE-2021-3544 +Reported-by: default avatarLi Qiang +virtio-gpu fix: 5e8e3c4c + + ("virtio-gpu: fix resource leak +in virgl_cmd_resource_unref" +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-6-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/virgl.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index 43413e29df..4b8b536edf 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -105,8 +105,14 @@ virgl_cmd_resource_unref(VuGpu *g, + struct virtio_gpu_ctrl_command *cmd) + { + struct virtio_gpu_resource_unref unref; ++ struct iovec *res_iovs = NULL; ++ int num_iovs = 0; + + VUGPU_FILL_CMD(unref); ++ virgl_renderer_resource_detach_iov(unref.resource_id, ++ &res_iovs, ++ &num_iovs); ++ g_free(res_iovs); + + virgl_renderer_resource_unref(unref.resource_id); + } +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch b/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch new file mode 100644 index 0000000000000000000000000000000000000000..986a49f81fa988ed176c541265571ca01d5c5c85 --- /dev/null +++ b/vhost-user-gpu-fix-memory-leak-in-virgl_resource_att.patch @@ -0,0 +1,50 @@ +From 6348348ee6a76c28159c64d6392fb6ba5a0b4374 Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 10:09:13 +0800 +Subject: [PATCH] vhost-user-gpu: fix memory leak in + 'virgl_resource_attach_backing' (CVE-2021-3544) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +If 'virgl_renderer_resource_attach_iov' failed, the 'res_iovs' will +be leaked. + +Fixes: CVE-2021-3544 +Reported-by: default avatarLi Qiang +virtio-gpu fix: 33243031 + + ("virtio-gpu-3d: fix memory leak +in resource attach backing") +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-7-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/virgl.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index 4b8b536edf..79556df094 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -282,8 +282,11 @@ virgl_resource_attach_backing(VuGpu *g, + return; + } + +- virgl_renderer_resource_attach_iov(att_rb.resource_id, ++ ret = virgl_renderer_resource_attach_iov(att_rb.resource_id, + res_iovs, att_rb.nr_entries); ++ if (ret != 0) { ++ g_free(res_iovs); ++ } + } + + static void +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch b/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch new file mode 100644 index 0000000000000000000000000000000000000000..e67cd9a6f4220e628237a6cee4eeb5f755c67196 --- /dev/null +++ b/vhost-user-gpu-fix-memory-leak-while-calling-vg_reso.patch @@ -0,0 +1,51 @@ +From c276538416e9238e352d0f720db57ea1020e555f Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 10:02:08 +0800 +Subject: [PATCH] vhost-user-gpu: fix memory leak while calling + 'vg_resource_unref' (CVE-2021-3544) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +If the guest trigger following sequences, the attach_backing will be leaked: + + vg_resource_create_2d + vg_resource_attach_backing + vg_resource_unref + +This patch fix this by freeing 'res->iov' in vg_resource_destroy. + +Fixes: CVE-2021-3544 +Reported-by: default avatarLi Qiang +virtio-gpu fix: 5e8e3c4c + + ("virtio-gpu: fix resource leak +in virgl_cmd_resource_unref") +Reviewed-by: default avatarPrasad J Pandit +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-5-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +index 4f087d6000..43d9851800 100644 +--- a/contrib/vhost-user-gpu/main.c ++++ b/contrib/vhost-user-gpu/main.c +@@ -379,6 +379,7 @@ vg_resource_destroy(VuGpu *g, + } + + vugbm_buffer_destroy(&res->buffer); ++ g_free(res->iov); + pixman_image_unref(res->image); + QTAILQ_REMOVE(&g->reslist, res, next); + g_free(res); +-- +2.27.0 + diff --git a/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch b/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d565004e122e2c0308162fafb5ce3294a69f873 --- /dev/null +++ b/vhost-user-gpu-fix-resource-leak-in-vg_resource_crea.patch @@ -0,0 +1,41 @@ +From 58e7327879e89700630ca766974a18f9ac55897c Mon Sep 17 00:00:00 2001 +From: Li Qiang +Date: Tue, 15 Jun 2021 09:53:22 +0800 +Subject: [PATCH] vhost-user-gpu: fix resource leak in 'vg_resource_create_2d' + (CVE-2021-3544) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fix CVE-2021-3544 + +Call 'vugbm_buffer_destroy' in error path to avoid resource leak. + +Fixes: CVE-2021-3544 +Reported-by: default avatarLi Qiang +Reviewed-by: default avatarPrasad J Pandit +Signed-off-by: default avatarLi Qiang +Reviewed-by: Marc-André Lureau's avatarMarc-André Lureau +Message-Id: <20210516030403.107723-3-liq3ea@163.com> +Signed-off-by: Gerd Hoffmann's avatarGerd Hoffmann + +Signed-off-by: Jiajie Li +--- + contrib/vhost-user-gpu/main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/contrib/vhost-user-gpu/main.c b/contrib/vhost-user-gpu/main.c +index b45d2019b4..f69af7d17f 100644 +--- a/contrib/vhost-user-gpu/main.c ++++ b/contrib/vhost-user-gpu/main.c +@@ -328,6 +328,7 @@ vg_resource_create_2d(VuGpu *g, + g_critical("%s: resource creation failed %d %d %d", + __func__, c2d.resource_id, c2d.width, c2d.height); + g_free(res); ++ vugbm_buffer_destroy(&res->buffer); + cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; + return; + } +-- +2.27.0 + diff --git a/vhost-user-save-features-if-the-char-dev-is-closed.patch b/vhost-user-save-features-if-the-char-dev-is-closed.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a0d04f4d7d98fd97e84bfba35f99d4871605b37 --- /dev/null +++ b/vhost-user-save-features-if-the-char-dev-is-closed.patch @@ -0,0 +1,42 @@ +From 7b404cae7fa2850d476c29258f03b8e77a5b4bd0 Mon Sep 17 00:00:00 2001 +From: Adrian Moreno +Date: Tue, 24 Sep 2019 18:20:44 +0200 +Subject: [PATCH] vhost-user: save features if the char dev is closed + +That way the state can be correctly restored when the device is opened +again. This might happen if the backend is restarted. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1738768 +Reported-by: Pei Zhang +Fixes: 6ab79a20af3a ("do not call vhost_net_cleanup() on running net from char user event") +Cc: ddstreet@canonical.com +Cc: Michael S. Tsirkin +Cc: qemu-stable@nongnu.org +Signed-off-by: Adrian Moreno +Message-Id: <20190924162044.11414-1-amorenoz@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c6beefd674fff8d41b90365dfccad32e53a5abcb) +Signed-off-by: Michael Roth +--- + net/vhost-user.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 51921de443..014199d600 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -235,6 +235,10 @@ static void chr_closed_bh(void *opaque) + + s = DO_UPCAST(NetVhostUserState, nc, ncs[0]); + ++ if (s->vhost_net) { ++ s->acked_features = vhost_net_get_acked_features(s->vhost_net); ++ } ++ + qmp_set_link(name, false, &err); + + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, +-- +2.23.0 diff --git a/vhost-user-scsi-prevent-using-uninitialized-vqs.patch b/vhost-user-scsi-prevent-using-uninitialized-vqs.patch index 9c4f92316d4da5e9e53ddc17e9915427e3cd9c36..d1bf2a087bf4fcbedf2a8c0fbb8b62188737d1e7 100644 --- a/vhost-user-scsi-prevent-using-uninitialized-vqs.patch +++ b/vhost-user-scsi-prevent-using-uninitialized-vqs.patch @@ -1,7 +1,7 @@ -From 19d56f560879081de411f359417eaaa2998c9e3a Mon Sep 17 00:00:00 2001 +From 4d8f2885b3f1219c3df2cf1a00dc0c55b23ee715 Mon Sep 17 00:00:00 2001 From: Raphael Norwitz -Date: Tue, 11 Jun 2019 17:35:17 -0700 -Subject: [PATCH 5/5] vhost-user-scsi: prevent using uninitialized vqs +Date: Tue, 14 Apr 2020 21:39:05 +0800 +Subject: [PATCH] vhost-user-scsi: prevent using uninitialized vqs Of the 3 virtqueues, seabios only sets cmd, leaving ctrl and event without a physical address. This can cause @@ -26,18 +26,17 @@ Signed-off-by: Stefan Hajnoczi 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c -index 8b1e687..241631f 100644 +index fcee67d5..affc2431 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c -@@ -90,7 +90,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) +@@ -91,7 +91,7 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) } - + vsc->dev.nvqs = 2 + vs->conf.num_queues; - vsc->dev.vqs = g_new(struct vhost_virtqueue, vsc->dev.nvqs); + vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); vsc->dev.vq_index = 0; vsc->dev.backend_features = 0; - --- -1.8.3.1 - + vqs = vsc->dev.vqs; +-- +2.23.0 diff --git a/virtio-add-ability-to-delete-vq-through-a-pointer.patch b/virtio-add-ability-to-delete-vq-through-a-pointer.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0989895e40fe4e0d3816966a24075d74b8797a0 --- /dev/null +++ b/virtio-add-ability-to-delete-vq-through-a-pointer.patch @@ -0,0 +1,61 @@ +From 98ae454efe48b2a465dfe9bc3c412b6375f1fbfc Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Mon, 9 Dec 2019 11:46:13 -0500 +Subject: [PATCH 1/9] virtio: add ability to delete vq through a pointer + +Devices tend to maintain vq pointers, allow deleting them trough a vq pointer. + +Signed-off-by: Michael S. Tsirkin +Reviewed-by: David Hildenbrand +Signed-off-by: AlexChen +--- + hw/virtio/virtio.c | 13 +++++++++---- + include/hw/virtio/virtio.h | 2 ++ + 2 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 79c2dcf..3d027d3 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -1636,16 +1636,21 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + return &vdev->vq[i]; + } + ++void virtio_delete_queue(VirtQueue *vq) ++{ ++ vq->vring.num = 0; ++ vq->vring.num_default = 0; ++ vq->handle_output = NULL; ++ vq->handle_aio_output = NULL; ++} ++ + void virtio_del_queue(VirtIODevice *vdev, int n) + { + if (n < 0 || n >= VIRTIO_QUEUE_MAX) { + abort(); + } + +- vdev->vq[n].vring.num = 0; +- vdev->vq[n].vring.num_default = 0; +- vdev->vq[n].handle_output = NULL; +- vdev->vq[n].handle_aio_output = NULL; ++ virtio_delete_queue(&vdev->vq[n]); + } + + static void virtio_set_isr(VirtIODevice *vdev, int value) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index f9f6237..ca2fbae 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -187,6 +187,8 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + + void virtio_del_queue(VirtIODevice *vdev, int n); + ++void virtio_delete_queue(VirtQueue *vq); ++ + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len); + void virtqueue_flush(VirtQueue *vq, unsigned int count); +-- +1.8.3.1 + diff --git a/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch b/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c2a3f2a424bd30ff95b8fcb53ae23c324d43153 --- /dev/null +++ b/virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch @@ -0,0 +1,80 @@ +From 01be50603be4f17af4318a7a3fe58dcc6dab1b31 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Fri, 16 Aug 2019 19:15:03 +0200 +Subject: [PATCH] virtio-blk: Cancel the pending BH when the dataplane is reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When 'system_reset' is called, the main loop clear the memory +region cache before the BH has a chance to execute. Later when +the deferred function is called, some assumptions that were +made when scheduling them are no longer true when they actually +execute. + +This is what happens using a virtio-blk device (fresh RHEL7.8 install): + + $ (sleep 12.3; echo system_reset; sleep 12.3; echo system_reset; sleep 1; echo q) \ + | qemu-system-x86_64 -m 4G -smp 8 -boot menu=on \ + -device virtio-blk-pci,id=image1,drive=drive_image1 \ + -drive file=/var/lib/libvirt/images/rhel78.qcow2,if=none,id=drive_image1,format=qcow2,cache=none \ + -device virtio-net-pci,netdev=net0,id=nic0,mac=52:54:00:c4:e7:84 \ + -netdev tap,id=net0,script=/bin/true,downscript=/bin/true,vhost=on \ + -monitor stdio -serial null -nographic + (qemu) system_reset + (qemu) system_reset + (qemu) qemu-system-x86_64: hw/virtio/virtio.c:225: vring_get_region_caches: Assertion `caches != NULL' failed. + Aborted + + (gdb) bt + Thread 1 (Thread 0x7f109c17b680 (LWP 10939)): + #0 0x00005604083296d1 in vring_get_region_caches (vq=0x56040a24bdd0) at hw/virtio/virtio.c:227 + #1 0x000056040832972b in vring_avail_flags (vq=0x56040a24bdd0) at hw/virtio/virtio.c:235 + #2 0x000056040832d13d in virtio_should_notify (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1648 + #3 0x000056040832d1f8 in virtio_notify_irqfd (vdev=0x56040a240630, vq=0x56040a24bdd0) at hw/virtio/virtio.c:1662 + #4 0x00005604082d213d in notify_guest_bh (opaque=0x56040a243ec0) at hw/block/dataplane/virtio-blk.c:75 + #5 0x000056040883dc35 in aio_bh_call (bh=0x56040a243f10) at util/async.c:90 + #6 0x000056040883dccd in aio_bh_poll (ctx=0x560409161980) at util/async.c:118 + #7 0x0000560408842af7 in aio_dispatch (ctx=0x560409161980) at util/aio-posix.c:460 + #8 0x000056040883e068 in aio_ctx_dispatch (source=0x560409161980, callback=0x0, user_data=0x0) at util/async.c:261 + #9 0x00007f10a8fca06d in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 + #10 0x0000560408841445 in glib_pollfds_poll () at util/main-loop.c:215 + #11 0x00005604088414bf in os_host_main_loop_wait (timeout=0) at util/main-loop.c:238 + #12 0x00005604088415c4 in main_loop_wait (nonblocking=0) at util/main-loop.c:514 + #13 0x0000560408416b1e in main_loop () at vl.c:1923 + #14 0x000056040841e0e8 in main (argc=20, argv=0x7ffc2c3f9c58, envp=0x7ffc2c3f9d00) at vl.c:4578 + +Fix this by cancelling the BH when the virtio dataplane is stopped. + +[This is version of the patch was modified as discussed with Philippe on +the mailing list thread. +--Stefan] + +Reported-by: Yihuang Yu +Suggested-by: Stefan Hajnoczi +Fixes: https://bugs.launchpad.net/qemu/+bug/1839428 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20190816171503.24761-1-philmd@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ebb6ff25cd888a52a64a9adc3692541c6d1d9a42) +Signed-off-by: Michael Roth +--- + hw/block/dataplane/virtio-blk.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 158c78f852..5fea76df85 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -297,6 +297,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); + } + ++ qemu_bh_cancel(s->bh); ++ notify_guest_bh(s); /* final chance to notify guest */ ++ + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +-- +2.23.0 diff --git a/virtio-blk-On-restart-process-queued-requests-in-the.patch b/virtio-blk-On-restart-process-queued-requests-in-the.patch new file mode 100644 index 0000000000000000000000000000000000000000..5edb6fd96f56ff66757af73d40bb90db142bbdc9 --- /dev/null +++ b/virtio-blk-On-restart-process-queued-requests-in-the.patch @@ -0,0 +1,191 @@ +From 882897127955fbede44c73703ec297c8ae89775d Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Thu, 21 Jan 2021 15:46:52 +0800 +Subject: [PATCH] virtio-blk: On restart, process queued requests in the proper + context + +On restart, we were scheduling a BH to process queued requests, which +would run before starting up the data plane, leading to those requests +being assigned and started on coroutines on the main context. + +This could cause requests to be wrongly processed in parallel from +different threads (the main thread and the iothread managing the data +plane), potentially leading to multiple issues. + +For example, stopping and resuming a VM multiple times while the guest +is generating I/O on a virtio_blk device can trigger a crash with a +stack tracing looking like this one: + +<------> + Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) + at block/linux-aio.c:236 + #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 + #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:520 + #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) + at util/aio-posix.c:562 + #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:597 + #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 + #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 + #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 + #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 + #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 + + Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) + at block/linux-aio.c:375 + #6 0x00005567a12f4af2 in laio_co_submit + (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) + at block/linux-aio.c:394 + #7 0x00005567a12f1803 in raw_co_prw + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) + at block/file-posix.c:1892 + #8 0x00005567a12f1941 in raw_co_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) + at block/file-posix.c:1925 + #9 0x00005567a12fe3e1 in bdrv_driver_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) + at block/io.c:1183 + #10 0x00005567a1300340 in bdrv_aligned_pwritev + (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 + #11 0x00005567a1300b29 in bdrv_co_pwritev_part + (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) + at block/io.c:2137 + #12 0x00005567a12baba1 in qcow2_co_pwritev_task + (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 + #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 + #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 + #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 + #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 + #17 0x00007ff6626f1350 in () + #18 0x0000000000000000 in () +<------> + +This is also known to cause crashes with this message (assertion +failed): + + aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-3-slp(a)redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/block/dataplane/virtio-blk.c | 8 ++++++++ + hw/block/virtio-blk.c | 18 ++++++++++++------ + include/hw/virtio/virtio-blk.h | 2 +- + 3 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 5fea76df85..4476f97960 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -219,6 +219,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + ++ /* Process queued requests before the ones in vring */ ++ virtio_blk_process_queued_requests(vblk, false); ++ + /* Kick right away to begin processing requests already in vring */ + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); +@@ -238,6 +241,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + return 0; + + fail_guest_notifiers: ++ /* ++ * If we failed to set up the guest notifiers queued requests will be ++ * processed on the main context. ++ */ ++ virtio_blk_process_queued_requests(vblk, false); + vblk->dataplane_disabled = true; + s->starting = false; + vblk->dataplane_started = true; +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cee2c673a5..ddf525b9d7 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -809,7 +809,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-void virtio_blk_process_queued_requests(VirtIOBlock *s) ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) + { + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; +@@ -837,7 +837,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } +- blk_dec_in_flight(s->conf.conf.blk); ++ if (is_bh) { ++ blk_dec_in_flight(s->conf.conf.blk); ++ } + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + +@@ -848,21 +850,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) + qemu_bh_delete(s->bh); + s->bh = NULL; + +- virtio_blk_process_queued_requests(s); ++ virtio_blk_process_queued_requests(s, true); + } + + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { + VirtIOBlock *s = opaque; ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); ++ VirtioBusState *bus = VIRTIO_BUS(qbus); + + if (!running) { + return; + } + +- if (!s->bh) { +- /* FIXME The data plane is not started yet, so these requests are +- * processed in the main thread. */ ++ /* ++ * If ioeventfd is enabled, don't schedule the BH here as queued ++ * requests will be processed while starting the data plane. ++ */ ++ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { + s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), + virtio_blk_dma_restart_bh, s); + blk_inc_in_flight(s->conf.conf.blk); +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cf8eea2f58..e77f0db3b0 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +-void virtio_blk_process_queued_requests(VirtIOBlock *s); ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); + + #endif +-- +2.27.0 + diff --git a/virtio-blk-Refactor-the-code-that-processes-queued-r.patch b/virtio-blk-Refactor-the-code-that-processes-queued-r.patch new file mode 100644 index 0000000000000000000000000000000000000000..2848fbd0aae20aceae61a1ad4b07a47d147659b4 --- /dev/null +++ b/virtio-blk-Refactor-the-code-that-processes-queued-r.patch @@ -0,0 +1,70 @@ +From 21c5ffb363930dfe6213bb677c5811fede3bcee2 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Thu, 21 Jan 2021 15:46:51 +0800 +Subject: [PATCH] virtio-blk: Refactor the code that processes queued requests + +Move the code that processes queued requests from +virtio_blk_dma_restart_bh() to its own, non-static, function. This +will allow us to call it from the virtio_blk_data_plane_start() in a +future patch. + +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-2-slp(a)redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 16 +++++++++++----- + include/hw/virtio/virtio-blk.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 703ed4c93b..cee2c673a5 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -809,15 +809,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-static void virtio_blk_dma_restart_bh(void *opaque) ++void virtio_blk_process_queued_requests(VirtIOBlock *s) + { +- VirtIOBlock *s = opaque; + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + s->rq = NULL; + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +@@ -845,6 +841,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + ++static void virtio_blk_dma_restart_bh(void *opaque) ++{ ++ VirtIOBlock *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ virtio_blk_process_queued_requests(s); ++} ++ + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cddcfbebe9..cf8eea2f58 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); ++void virtio_blk_process_queued_requests(VirtIOBlock *s); + + #endif +-- +2.27.0 + diff --git a/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch b/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch new file mode 100644 index 0000000000000000000000000000000000000000..205f663470d3aa594910bd19e2be8547d226e1a8 --- /dev/null +++ b/virtio-blk-delete-vqs-on-the-error-path-in-realize.patch @@ -0,0 +1,45 @@ +From ec8a25fec9898f46a6a94aa4f328fe02948b3d59 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Sat, 28 Mar 2020 08:57:04 +0800 +Subject: [PATCH 12/14] virtio-blk: delete vqs on the error path in realize() + +virtio_vqs forgot to free on the error path in realize(). Fix that. + +The asan stack: +Direct leak of 14336 byte(s) in 1 object(s) allocated from: + #0 0x7f58b93fd970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) + #1 0x7f58b858249d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) + #2 0x5562cc627f49 in virtio_add_queue /mnt/sdb/qemu/hw/virtio/virtio.c:2413 + #3 0x5562cc4b524a in virtio_blk_device_realize /mnt/sdb/qemu/hw/block/virtio-blk.c:1202 + #4 0x5562cc613050 in virtio_device_realize /mnt/sdb/qemu/hw/virtio/virtio.c:3615 + #5 0x5562ccb7a568 in device_set_realized /mnt/sdb/qemu/hw/core/qdev.c:891 + #6 0x5562cd39cd45 in property_set_bool /mnt/sdb/qemu/qom/object.c:2238 + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Reviewed-by: Stefano Garzarella +Message-Id: <20200328005705.29898-2-pannengyuan@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Peng Liang +--- + hw/block/virtio-blk.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cbb3729158fe..703ed4c93bff 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1173,6 +1173,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); ++ for (i = 0; i < conf->num_queues; i++) { ++ virtio_del_queue(vdev, i); ++ } + virtio_cleanup(vdev); + return; + } +-- +2.26.2 + diff --git a/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch b/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch new file mode 100644 index 0000000000000000000000000000000000000000..aab3e86663021d2576763fff2a4f9c5985a2fb32 --- /dev/null +++ b/virtio-crypto-do-delete-ctrl_vq-in-virtio_crypto_dev.patch @@ -0,0 +1,55 @@ +From 62ded4fc6b38e2642ea4d95a93d70d0f608bee65 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Tue, 25 Feb 2020 15:55:54 +0800 +Subject: [PATCH 3/9] virtio-crypto: do delete ctrl_vq in + virtio_crypto_device_unrealize + +Similar to other virtio-deivces, ctrl_vq forgot to delete in virtio_crypto_device_unrealize, this patch fix it. +This device has aleardy maintained vq pointers. Thus, we use the new virtio_delete_queue function directly to do the cleanup. + +The leak stack: +Direct leak of 10752 byte(s) in 3 object(s) allocated from: + #0 0x7f4c024b1970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970) + #1 0x7f4c018be49d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d) + #2 0x55a2f8017279 in virtio_add_queue /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio.c:2333 + #3 0x55a2f8057035 in virtio_crypto_device_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio-crypto.c:814 + #4 0x55a2f8005d80 in virtio_device_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio.c:3531 + #5 0x55a2f8497d1b in device_set_realized /mnt/sdb/qemu-new/qemu_test/qemu/hw/core/qdev.c:891 + #6 0x55a2f8b48595 in property_set_bool /mnt/sdb/qemu-new/qemu_test/qemu/qom/object.c:2238 + #7 0x55a2f8b54fad in object_property_set_qobject /mnt/sdb/qemu-new/qemu_test/qemu/qom/qom-qobject.c:26 + #8 0x55a2f8b4de2c in object_property_set_bool /mnt/sdb/qemu-new/qemu_test/qemu/qom/object.c:1390 + #9 0x55a2f80609c9 in virtio_crypto_pci_realize /mnt/sdb/qemu-new/qemu_test/qemu/hw/virtio/virtio-crypto-pci.c:58 + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Cc: "Gonglei (Arei)" +Message-Id: <20200225075554.10835-5-pannengyuan@huawei.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/virtio/virtio-crypto.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 45187d3..0076b4b 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -830,12 +830,13 @@ static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp) + + max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1; + for (i = 0; i < max_queues; i++) { +- virtio_del_queue(vdev, i); ++ virtio_delete_queue(vcrypto->vqs[i].dataq); + q = &vcrypto->vqs[i]; + qemu_bh_delete(q->dataq_bh); + } + + g_free(vcrypto->vqs); ++ virtio_delete_queue(vcrypto->ctrl_vq); + + virtio_cleanup(vdev); + cryptodev_backend_set_used(vcrypto->cryptodev, false); +-- +1.8.3.1 + diff --git a/virtio-don-t-enable-notifications-during-polling.patch b/virtio-don-t-enable-notifications-during-polling.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb77429e9e2fa70688ed245f7cf440fb61cd38ad --- /dev/null +++ b/virtio-don-t-enable-notifications-during-polling.patch @@ -0,0 +1,146 @@ +From 0592b1e444e8ef7f00fb04a637dba72b732b70e4 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 9 Dec 2019 21:09:57 +0000 +Subject: [PATCH] virtio: don't enable notifications during polling + +Virtqueue notifications are not necessary during polling, so we disable +them. This allows the guest driver to avoid MMIO vmexits. +Unfortunately the virtio-blk and virtio-scsi handler functions re-enable +notifications, defeating this optimization. + +Fix virtio-blk and virtio-scsi emulation so they leave notifications +disabled. The key thing to remember for correctness is that polling +always checks one last time after ending its loop, therefore it's safe +to lose the race when re-enabling notifications at the end of polling. + +There is a measurable performance improvement of 5-10% with the null-co +block driver. Real-life storage configurations will see a smaller +improvement because the MMIO vmexit overhead contributes less to +latency. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20191209210957.65087-1-stefanha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/block/virtio-blk.c | 9 +++++++-- + hw/scsi/virtio-scsi.c | 9 +++++++-- + hw/virtio/virtio.c | 12 ++++++------ + include/hw/virtio/virtio.h | 1 + + 4 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 2db9804cfe..fbe2ed6779 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -766,13 +766,16 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + { + VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + aio_context_acquire(blk_get_aio_context(s->blk)); + blk_io_plug(s->blk); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_blk_get_request(s, vq))) { + progress = true; +@@ -783,7 +786,9 @@ bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (!virtio_queue_empty(vq)); + + if (mrb.num_reqs) { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 8b9e5e2b49..eddb13e7c6 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -594,12 +594,15 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; ++ bool suppress_notifications = virtio_queue_get_notification(vq); + bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + + do { +- virtio_queue_set_notification(vq, 0); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 0); ++ } + + while ((req = virtio_scsi_pop_req(s, vq))) { + progress = true; +@@ -619,7 +622,9 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + } + +- virtio_queue_set_notification(vq, 1); ++ if (suppress_notifications) { ++ virtio_queue_set_notification(vq, 1); ++ } + } while (ret != -EINVAL && !virtio_queue_empty(vq)); + + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 90971f4afa..daa8250332 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -390,6 +390,11 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) + rcu_read_unlock(); + } + ++bool virtio_queue_get_notification(VirtQueue *vq) ++{ ++ return vq->notification; ++} ++ + int virtio_queue_ready(VirtQueue *vq) + { + return vq->vring.avail != 0; +@@ -2572,17 +2577,12 @@ static bool virtio_queue_host_notifier_aio_poll(void *opaque) + { + EventNotifier *n = opaque; + VirtQueue *vq = container_of(n, VirtQueue, host_notifier); +- bool progress; + + if (!vq->vring.desc || virtio_queue_empty(vq)) { + return false; + } + +- progress = virtio_queue_notify_aio_vq(vq); +- +- /* In case the handler function re-enabled notifications */ +- virtio_queue_set_notification(vq, 0); +- return progress; ++ return virtio_queue_notify_aio_vq(vq); + } + + static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index ca2fbaeb35..7394715407 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -229,6 +229,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); + + void virtio_notify_config(VirtIODevice *vdev); + ++bool virtio_queue_get_notification(VirtQueue *vq); + void virtio_queue_set_notification(VirtQueue *vq, int enable); + + int virtio_queue_ready(VirtQueue *vq); +-- +2.27.0 + diff --git a/virtio-gracefully-handle-invalid-region-caches.patch b/virtio-gracefully-handle-invalid-region-caches.patch new file mode 100644 index 0000000000000000000000000000000000000000..2793f21b0d988625b4f53624c471c403937fcedc --- /dev/null +++ b/virtio-gracefully-handle-invalid-region-caches.patch @@ -0,0 +1,238 @@ +From 63a3c25baa9c7372b80df80be4447552af6d6ba0 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 7 Feb 2020 10:46:19 +0000 +Subject: [PATCH 7/9] virtio: gracefully handle invalid region caches + +The virtqueue code sets up MemoryRegionCaches to access the virtqueue +guest RAM data structures. The code currently assumes that +VRingMemoryRegionCaches is initialized before device emulation code +accesses the virtqueue. An assertion will fail in +vring_get_region_caches() when this is not true. Device fuzzing found a +case where this assumption is false (see below). + +Virtqueue guest RAM addresses can also be changed from a vCPU thread +while an IOThread is accessing the virtqueue. This breaks the same +assumption but this time the caches could become invalid partway through +the virtqueue code. The code fetches the caches RCU pointer multiple +times so we will need to validate the pointer every time it is fetched. + +Add checks each time we call vring_get_region_caches() and treat invalid +caches as a nop: memory stores are ignored and memory reads return 0. + +The fuzz test failure is as follows: + + $ qemu -M pc -device virtio-blk-pci,id=drv0,drive=drive0,addr=4.0 \ + -drive if=none,id=drive0,file=null-co://,format=raw,auto-read-only=off \ + -drive if=none,id=drive1,file=null-co://,file.read-zeroes=on,format=raw \ + -display none \ + -qtest stdio + endianness + outl 0xcf8 0x80002020 + outl 0xcfc 0xe0000000 + outl 0xcf8 0x80002004 + outw 0xcfc 0x7 + write 0xe0000000 0x24 0x00ffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab5cffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffabffffffab0000000001 + inb 0x4 + writew 0xe000001c 0x1 + write 0xe0000014 0x1 0x0d + +The following error message is produced: + + qemu-system-x86_64: /home/stefanha/qemu/hw/virtio/virtio.c:286: vring_get_region_caches: Assertion `caches != NULL' failed. + +The backtrace looks like this: + + #0 0x00007ffff5520625 in raise () at /lib64/libc.so.6 + #1 0x00007ffff55098d9 in abort () at /lib64/libc.so.6 + #2 0x00007ffff55097a9 in _nl_load_domain.cold () at /lib64/libc.so.6 + #3 0x00007ffff5518a66 in annobin_assert.c_end () at /lib64/libc.so.6 + #4 0x00005555559073da in vring_get_region_caches (vq=) at qemu/hw/virtio/virtio.c:286 + #5 vring_get_region_caches (vq=) at qemu/hw/virtio/virtio.c:283 + #6 0x000055555590818d in vring_used_flags_set_bit (mask=1, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398 + #7 virtio_queue_split_set_notification (enable=0, vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:398 + #8 virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:451 + #9 0x0000555555908512 in virtio_queue_set_notification (vq=vq@entry=0x5555575ceea0, enable=enable@entry=0) at qemu/hw/virtio/virtio.c:444 + #10 0x00005555558c697a in virtio_blk_handle_vq (s=0x5555575c57e0, vq=0x5555575ceea0) at qemu/hw/block/virtio-blk.c:775 + #11 0x0000555555907836 in virtio_queue_notify_aio_vq (vq=0x5555575ceea0) at qemu/hw/virtio/virtio.c:2244 + #12 0x0000555555cb5dd7 in aio_dispatch_handlers (ctx=ctx@entry=0x55555671a420) at util/aio-posix.c:429 + #13 0x0000555555cb67a8 in aio_dispatch (ctx=0x55555671a420) at util/aio-posix.c:460 + #14 0x0000555555cb307e in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:260 + #15 0x00007ffff7bbc510 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 + #16 0x0000555555cb5848 in glib_pollfds_poll () at util/main-loop.c:219 + #17 os_host_main_loop_wait (timeout=) at util/main-loop.c:242 + #18 main_loop_wait (nonblocking=) at util/main-loop.c:518 + #19 0x00005555559b20c9 in main_loop () at vl.c:1683 + #20 0x0000555555838115 in main (argc=, argv=, envp=) at vl.c:4441 + +Reported-by: Alexander Bulekov +Cc: Michael Tsirkin +Cc: Cornelia Huck +Cc: Paolo Bonzini +Cc: qemu-stable@nongnu.org +Signed-off-by: Stefan Hajnoczi +Message-Id: <20200207104619.164892-1-stefanha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/virtio/virtio.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 59 insertions(+), 7 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3d027d3..90971f4 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -221,15 +221,19 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, + + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) + { +- VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches); +- assert(caches != NULL); +- return caches; ++ return atomic_rcu_read(&vq->vring.caches); + } ++ + /* Called within rcu_read_lock(). */ + static inline uint16_t vring_avail_flags(VirtQueue *vq) + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingAvail, flags); ++ ++ if (!caches) { ++ return 0; ++ } ++ + return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); + } + +@@ -238,6 +242,11 @@ static inline uint16_t vring_avail_idx(VirtQueue *vq) + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingAvail, idx); ++ ++ if (!caches) { ++ return 0; ++ } ++ + vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); + return vq->shadow_avail_idx; + } +@@ -247,6 +256,11 @@ static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingAvail, ring[i]); ++ ++ if (!caches) { ++ return 0; ++ } ++ + return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); + } + +@@ -262,6 +276,11 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingUsed, ring[i]); ++ ++ if (!caches) { ++ return; ++ } ++ + virtio_tswap32s(vq->vdev, &uelem->id); + virtio_tswap32s(vq->vdev, &uelem->len); + address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem)); +@@ -273,6 +292,11 @@ static uint16_t vring_used_idx(VirtQueue *vq) + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingUsed, idx); ++ ++ if (!caches) { ++ return 0; ++ } ++ + return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); + } + +@@ -281,8 +305,12 @@ static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) + { + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + hwaddr pa = offsetof(VRingUsed, idx); +- virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); +- address_space_cache_invalidate(&caches->used, pa, sizeof(val)); ++ ++ if (caches) { ++ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); ++ address_space_cache_invalidate(&caches->used, pa, sizeof(val)); ++ } ++ + vq->used_idx = val; + } + +@@ -292,8 +320,13 @@ static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask) + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + VirtIODevice *vdev = vq->vdev; + hwaddr pa = offsetof(VRingUsed, flags); +- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); ++ uint16_t flags; ++ ++ if (!caches) { ++ return; ++ } + ++ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); + virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask); + address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); + } +@@ -304,8 +337,13 @@ static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask) + VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); + VirtIODevice *vdev = vq->vdev; + hwaddr pa = offsetof(VRingUsed, flags); +- uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); ++ uint16_t flags; + ++ if (!caches) { ++ return; ++ } ++ ++ flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa); + virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask); + address_space_cache_invalidate(&caches->used, pa, sizeof(flags)); + } +@@ -320,6 +358,10 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) + } + + caches = vring_get_region_caches(vq); ++ if (!caches) { ++ return; ++ } ++ + pa = offsetof(VRingUsed, ring[vq->vring.num]); + virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val); + address_space_cache_invalidate(&caches->used, pa, sizeof(val)); +@@ -626,6 +668,11 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + + max = vq->vring.num; + caches = vring_get_region_caches(vq); ++ if (!caches) { ++ virtio_error(vdev, "Region cached not initialized"); ++ goto err; ++ } ++ + if (caches->desc.len < max * sizeof(VRingDesc)) { + virtio_error(vdev, "Cannot map descriptor ring"); + goto err; +@@ -894,6 +941,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) + i = head; + + caches = vring_get_region_caches(vq); ++ if (!caches) { ++ virtio_error(vdev, "Region caches not initialized"); ++ goto done; ++ } ++ + if (caches->desc.len < max * sizeof(VRingDesc)) { + virtio_error(vdev, "Cannot map descriptor ring"); + goto done; +-- +1.8.3.1 + diff --git a/virtio-input-fix-memory-leak-on-unrealize.patch b/virtio-input-fix-memory-leak-on-unrealize.patch new file mode 100644 index 0000000000000000000000000000000000000000..df83453f04525eb4b9fa29ba3d03dc6fa8b31fc5 --- /dev/null +++ b/virtio-input-fix-memory-leak-on-unrealize.patch @@ -0,0 +1,45 @@ +From e29f08036ff11bf220463b4327b315505e760a44 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 21 Nov 2019 13:56:49 +0400 +Subject: [PATCH 9/9] virtio-input: fix memory leak on unrealize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Spotted by ASAN + minor stylistic change. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Michael S. Tsirkin +Message-Id: <20191121095649.25453-1-marcandre.lureau@redhat.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Marc-André Lureau +Reviewed-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/input/virtio-input.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c +index 9946394..401c1de 100644 +--- a/hw/input/virtio-input.c ++++ b/hw/input/virtio-input.c +@@ -275,6 +275,7 @@ static void virtio_input_finalize(Object *obj) + + g_free(vinput->queue); + } ++ + static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) + { + VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(dev); +@@ -288,6 +289,8 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp) + return; + } + } ++ virtio_del_queue(vdev, 0); ++ virtio_del_queue(vdev, 1); + virtio_cleanup(vdev); + } + +-- +1.8.3.1 + diff --git a/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch b/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch new file mode 100644 index 0000000000000000000000000000000000000000..f955fbb1a7f6b0ae8e646a366d3c6401cf699788 --- /dev/null +++ b/virtio-net-delete-also-control-queue-when-TX-RX-dele.patch @@ -0,0 +1,49 @@ +From 358e2bfe2e1a65b1e926163d7d1ffaefd601d874 Mon Sep 17 00:00:00 2001 +From: Julia Suvorova +Date: Wed, 19 Feb 2020 21:34:31 +0000 +Subject: [PATCH] virtio-net: delete also control queue when TX/RX deleted + +RH-Author: Julia Suvorova +Message-id: <20200219213431.11913-5-jusual@redhat.com> +Patchwork-id: 93983 +O-Subject: [RHEL-AV-8.2.0 qemu-kvm PATCH 4/4] virtio-net: delete also control queue when TX/RX deleted +Bugzilla: 1791590 +RH-Acked-by: Danilo de Paula +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Michael S. Tsirkin + +From: Yuri Benditovich + +https://bugzilla.redhat.com/show_bug.cgi?id=1708480 +If the control queue is not deleted together with TX/RX, it +later will be ignored in freeing cache resources and hot +unplug will not be completed. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Yuri Benditovich +Message-Id: <20191226043649.14481-3-yuri.benditovich@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d945d9f1731244ef341f74ede93120fc9de35913) +Signed-off-by: Danilo C. L. de Paula +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 6adb0fe252..63f1bae99c 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2803,7 +2803,8 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp) + for (i = 0; i < max_queues; i++) { + virtio_net_del_queue(n, i); + } +- ++ /* delete also control vq */ ++ virtio_del_queue(vdev, max_queues * 2); + qemu_announce_timer_del(&n->announce_timer, false); + g_free(n->vqs); + qemu_del_nic(n->nic); +-- +2.27.0 + diff --git a/virtio-net-fix-use-after-unmap-free-for-sg.patch b/virtio-net-fix-use-after-unmap-free-for-sg.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee1fa0695da1d960164f576fb1688dad0bacbd01 --- /dev/null +++ b/virtio-net-fix-use-after-unmap-free-for-sg.patch @@ -0,0 +1,123 @@ +From 662633f388c5cead35b6ba5428dc9ab88710a471 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Thu, 2 Sep 2021 13:44:12 +0800 +Subject: [PATCH] virtio-net: fix use after unmap/free for sg + +When mergeable buffer is enabled, we try to set the num_buffers after +the virtqueue elem has been unmapped. This will lead several issues, +E.g a use after free when the descriptor has an address which belongs +to the non direct access region. In this case we use bounce buffer +that is allocated during address_space_map() and freed during +address_space_unmap(). + +Fixing this by storing the elems temporarily in an array and delay the +unmap after we set the the num_buffers. + +This addresses CVE-2021-3748. + +Reported-by: Alexander Bulekov +Fixes: fbe78f4f55c6 ("virtio-net support") +Cc: qemu-stable@nongnu.org +Signed-off-by: Jason Wang +Signed-off-by: imxcc +--- + hw/net/virtio-net.c | 39 ++++++++++++++++++++++++++++++++------- + 1 file changed, 32 insertions(+), 7 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 63f1bae99c..f93823d06d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1265,10 +1265,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + VirtIONet *n = qemu_get_nic_opaque(nc); + VirtIONetQueue *q = virtio_net_get_subqueue(nc); + VirtIODevice *vdev = VIRTIO_DEVICE(n); ++ VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; ++ size_t lens[VIRTQUEUE_MAX_SIZE]; + struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; + struct virtio_net_hdr_mrg_rxbuf mhdr; + unsigned mhdr_cnt = 0; +- size_t offset, i, guest_offset; ++ size_t offset, i, guest_offset, j; ++ ssize_t err; + + if (!virtio_net_can_receive(nc)) { + return -1; +@@ -1291,6 +1294,12 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + + total = 0; + ++ if (i == VIRTQUEUE_MAX_SIZE) { ++ virtio_error(vdev, "virtio-net unexpected long buffer chain"); ++ err = size; ++ goto err; ++ } ++ + elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement)); + if (!elem) { + if (i) { +@@ -1302,7 +1311,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + n->guest_hdr_len, n->host_hdr_len, + vdev->guest_features); + } +- return -1; ++ err = -1; ++ goto err; + } + + if (elem->in_num < 1) { +@@ -1310,7 +1320,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + "virtio-net receive queue contains no in buffers"); + virtqueue_detach_element(q->rx_vq, elem, 0); + g_free(elem); +- return -1; ++ err = -1; ++ goto err; + } + + sg = elem->in_sg; +@@ -1342,12 +1353,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + if (!n->mergeable_rx_bufs && offset < size) { + virtqueue_unpop(q->rx_vq, elem, total); + g_free(elem); +- return size; ++ err = size; ++ goto err; + } + +- /* signal other side */ +- virtqueue_fill(q->rx_vq, elem, total, i++); +- g_free(elem); ++ elems[i] = elem; ++ lens[i] = total; ++ i++; + } + + if (mhdr_cnt) { +@@ -1357,10 +1369,23 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + &mhdr.num_buffers, sizeof mhdr.num_buffers); + } + ++ for (j = 0; j < i; j++) { ++ /* signal other side */ ++ virtqueue_fill(q->rx_vq, elems[j], lens[j], j); ++ g_free(elems[j]); ++ } ++ + virtqueue_flush(q->rx_vq, i); + virtio_notify(vdev, q->rx_vq); + + return size; ++ ++err: ++ for (j = 0; j < i; j++) { ++ g_free(elems[j]); ++ } ++ ++ return err; + } + + static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, +-- +2.27.0 + diff --git a/virtio-net-prevent-offloads-reset-on-migration.patch b/virtio-net-prevent-offloads-reset-on-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab8fbe26115279359c6a3928e93bf134ca88a2cb --- /dev/null +++ b/virtio-net-prevent-offloads-reset-on-migration.patch @@ -0,0 +1,122 @@ +From 4887acf574a573137660aa98d9d422ece0a41a5a Mon Sep 17 00:00:00 2001 +From: Mikhail Sennikovsky +Date: Fri, 11 Oct 2019 15:58:04 +0200 +Subject: [PATCH] virtio-net: prevent offloads reset on migration + +Currently offloads disabled by guest via the VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET +command are not preserved on VM migration. +Instead all offloads reported by guest features (via VIRTIO_PCI_GUEST_FEATURES) +get enabled. +What happens is: first the VirtIONet::curr_guest_offloads gets restored and offloads +are getting set correctly: + + #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=0, tso6=0, ecn=0, ufo=0) at net/net.c:474 + #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 + #2 virtio_net_post_load_device (opaque=0x555557701ca0, version_id=11) at hw/net/virtio-net.c:2334 + #3 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577c80 , opaque=0x555557701ca0, version_id=11) + at migration/vmstate.c:168 + #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2197 + #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 + #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 + #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 + #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 + #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 + #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 + #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 + +However later on the features are getting restored, and offloads get reset to +everything supported by features: + + #0 qemu_set_offload (nc=0x555556a11400, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/net.c:474 + #1 virtio_net_apply_guest_offloads (n=0x555557701ca0) at hw/net/virtio-net.c:720 + #2 virtio_net_set_features (vdev=0x555557701ca0, features=5104441767) at hw/net/virtio-net.c:773 + #3 virtio_set_features_nocheck (vdev=0x555557701ca0, val=5104441767) at hw/virtio/virtio.c:2052 + #4 virtio_load (vdev=0x555557701ca0, f=0x5555569dc010, version_id=11) at hw/virtio/virtio.c:2220 + #5 virtio_device_get (f=0x5555569dc010, opaque=0x555557701ca0, size=0, field=0x55555668cd00 <__compound_literal.5>) at hw/virtio/virtio.c:2036 + #6 vmstate_load_state (f=0x5555569dc010, vmsd=0x555556577ce0 , opaque=0x555557701ca0, version_id=11) at migration/vmstate.c:143 + #7 vmstate_load (f=0x5555569dc010, se=0x5555578189e0) at migration/savevm.c:829 + #8 qemu_loadvm_section_start_full (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2211 + #9 qemu_loadvm_state_main (f=0x5555569dc010, mis=0x5555569eee20) at migration/savevm.c:2395 + #10 qemu_loadvm_state (f=0x5555569dc010) at migration/savevm.c:2467 + #11 process_incoming_migration_co (opaque=0x0) at migration/migration.c:449 + +Fix this by preserving the state in saved_guest_offloads field and +pushing out offload initialization to the new post load hook. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Mikhail Sennikovsky +Signed-off-by: Jason Wang +(cherry picked from commit 7788c3f2e21e35902d45809b236791383bbb613e) +Signed-off-by: Michael Roth +--- + hw/net/virtio-net.c | 27 ++++++++++++++++++++++++--- + include/hw/virtio/virtio-net.h | 2 ++ + 2 files changed, 26 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index b9e1cd71cf..6adb0fe252 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2330,9 +2330,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id) + n->curr_guest_offloads = virtio_net_supported_guest_offloads(n); + } + +- if (peer_has_vnet_hdr(n)) { +- virtio_net_apply_guest_offloads(n); +- } ++ /* ++ * curr_guest_offloads will be later overwritten by the ++ * virtio_set_features_nocheck call done from the virtio_load. ++ * Here we make sure it is preserved and restored accordingly ++ * in the virtio_net_post_load_virtio callback. ++ */ ++ n->saved_guest_offloads = n->curr_guest_offloads; + + virtio_net_set_queues(n); + +@@ -2367,6 +2371,22 @@ static int virtio_net_post_load_device(void *opaque, int version_id) + return 0; + } + ++static int virtio_net_post_load_virtio(VirtIODevice *vdev) ++{ ++ VirtIONet *n = VIRTIO_NET(vdev); ++ /* ++ * The actual needed state is now in saved_guest_offloads, ++ * see virtio_net_post_load_device for detail. ++ * Restore it back and apply the desired offloads. ++ */ ++ n->curr_guest_offloads = n->saved_guest_offloads; ++ if (peer_has_vnet_hdr(n)) { ++ virtio_net_apply_guest_offloads(n); ++ } ++ ++ return 0; ++} ++ + /* tx_waiting field of a VirtIONetQueue */ + static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = { + .name = "virtio-net-queue-tx_waiting", +@@ -2909,6 +2929,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; + vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; + vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); ++ vdc->post_load = virtio_net_post_load_virtio; + vdc->vmsd = &vmstate_virtio_net_device; + } + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index b96f0c643f..07a9319f4b 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -182,6 +182,8 @@ struct VirtIONet { + char *netclient_name; + char *netclient_type; + uint64_t curr_guest_offloads; ++ /* used on saved state restore phase to preserve the curr_guest_offloads */ ++ uint64_t saved_guest_offloads; + AnnounceTimer announce_timer; + bool needs_vnet_hdr_swap; + bool mtu_bypass_backend; +-- +2.23.0 diff --git a/virtio-new-post_load-hook.patch b/virtio-new-post_load-hook.patch new file mode 100644 index 0000000000000000000000000000000000000000..974f286c6730c66cc3cb0a64b046bea341dd262b --- /dev/null +++ b/virtio-new-post_load-hook.patch @@ -0,0 +1,63 @@ +From 8010d3fce008dd13f155bc0babfe236ea44a2712 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Fri, 11 Oct 2019 15:58:03 +0200 +Subject: [PATCH] virtio: new post_load hook + +Post load hook in virtio vmsd is called early while device is processed, +and when VirtIODevice core isn't fully initialized. Most device +specific code isn't ready to deal with a device in such state, and +behaves weirdly. + +Add a new post_load hook in a device class instead. Devices should use +this unless they specifically want to verify the migration stream as +it's processed, e.g. for bounds checking. + +Cc: qemu-stable@nongnu.org +Suggested-by: "Dr. David Alan Gilbert" +Cc: Mikhail Sennikovsky +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +(cherry picked from commit 1dd713837cac8ec5a97d3b8492d72ce5ac94803c) +Signed-off-by: Michael Roth +--- + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 6 ++++++ + 2 files changed, 13 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index a94ea18a9c..7c3822c3a0 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2287,6 +2287,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + } + rcu_read_unlock(); + ++ if (vdc->post_load) { ++ ret = vdc->post_load(vdev); ++ if (ret) { ++ return ret; ++ } ++ } ++ + return 0; + } + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b189788cb2..f9f62370e9 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -158,6 +158,12 @@ typedef struct VirtioDeviceClass { + */ + void (*save)(VirtIODevice *vdev, QEMUFile *f); + int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id); ++ /* Post load hook in vmsd is called early while device is processed, and ++ * when VirtIODevice isn't fully initialized. Devices should use this instead, ++ * unless they specifically want to verify the migration stream as it's ++ * processed, e.g. for bounds checking. ++ */ ++ int (*post_load)(VirtIODevice *vdev); + const VMStateDescription *vmsd; + } VirtioDeviceClass; + +-- +2.23.0 diff --git a/virtio-pci-fix-queue_enable-write.patch b/virtio-pci-fix-queue_enable-write.patch new file mode 100644 index 0000000000000000000000000000000000000000..481b41bbf11f4ebb94ae8fd746b13ad4ac41555d --- /dev/null +++ b/virtio-pci-fix-queue_enable-write.patch @@ -0,0 +1,58 @@ +From aebd6a1512e03ba51f6824fcdbaa09f67e9ff5e2 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Wed, 10 Jun 2020 13:43:51 +0800 +Subject: [PATCH 11/11] virtio-pci: fix queue_enable write + +Spec said: The driver uses this to selectively prevent the device from +executing requests from this virtqueue. 1 - enabled; 0 - disabled. + +Though write 0 to queue_enable is forbidden by the spec, we should not +assume that the value is 1. + +Fix this by ignore the write value other than 1. + +Signed-off-by: Jason Wang +Message-Id: <20200610054351.15811-1-jasowang@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: BiaoXiang Ye +--- + hw/virtio/virtio-pci.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index b4b0ed26..4b8845a6 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1259,16 +1259,20 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + virtio_queue_set_vector(vdev, vdev->queue_sel, val); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: +- virtio_queue_set_num(vdev, vdev->queue_sel, +- proxy->vqs[vdev->queue_sel].num); +- virtio_queue_set_rings(vdev, vdev->queue_sel, ++ if (val == 1) { ++ virtio_queue_set_num(vdev, vdev->queue_sel, ++ proxy->vqs[vdev->queue_sel].num); ++ virtio_queue_set_rings(vdev, vdev->queue_sel, + ((uint64_t)proxy->vqs[vdev->queue_sel].desc[1]) << 32 | + proxy->vqs[vdev->queue_sel].desc[0], + ((uint64_t)proxy->vqs[vdev->queue_sel].avail[1]) << 32 | + proxy->vqs[vdev->queue_sel].avail[0], + ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | + proxy->vqs[vdev->queue_sel].used[0]); +- proxy->vqs[vdev->queue_sel].enabled = 1; ++ proxy->vqs[vdev->queue_sel].enabled = 1; ++ } else { ++ virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); ++ } + break; + case VIRTIO_PCI_COMMON_Q_DESCLO: + proxy->vqs[vdev->queue_sel].desc[0] = val; +-- +2.27.0.dirty + diff --git a/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch b/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8ed58faa8f5c6517d131ced73209bc41122158e --- /dev/null +++ b/virtio-pmem-do-delete-rq_vq-in-virtio_pmem_unrealize.patch @@ -0,0 +1,39 @@ +From 637606d18c7208e21d8ab4f318cccde64ae58c76 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Tue, 25 Feb 2020 15:55:53 +0800 +Subject: [PATCH 2/9] virtio-pmem: do delete rq_vq in virtio_pmem_unrealize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Similar to other virtio-devices, rq_vq forgot to delete in +virtio_pmem_unrealize, this patch fix it. This device has already +maintained a vq pointer, thus we use the new virtio_delete_queue +function directly to do the cleanup. + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Message-Id: <20200225075554.10835-4-pannengyuan@huawei.com> +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: AlexChen +--- + hw/virtio/virtio-pmem.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c +index 17c196d..c680b0a 100644 +--- a/hw/virtio/virtio-pmem.c ++++ b/hw/virtio/virtio-pmem.c +@@ -127,6 +127,7 @@ static void virtio_pmem_unrealize(DeviceState *dev, Error **errp) + VirtIOPMEM *pmem = VIRTIO_PMEM(dev); + + host_memory_backend_set_mapped(pmem->memdev, false); ++ virtio_delete_queue(pmem->rq_vq); + virtio_cleanup(vdev); + } + +-- +1.8.3.1 + diff --git a/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch b/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch new file mode 100644 index 0000000000000000000000000000000000000000..02069901b096cd09b0f30dbef9d55e3fe6dc920d --- /dev/null +++ b/virtio-serial-bus-Plug-memory-leak-on-realize-error-.patch @@ -0,0 +1,65 @@ +From 0d93f5455489274201b1054d987b12f8e8a6206e Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Mon, 9 Mar 2020 10:17:38 +0800 +Subject: [PATCH 11/14] virtio-serial-bus: Plug memory leak on realize() error + paths + +We neglect to free port->bh on the error paths. Fix that. +Reproducer: + {'execute': 'device_add', 'arguments': {'id': 'virtio_serial_pci0', 'driver': 'virtio-serial-pci', 'bus': 'pci.0', 'addr': '0x5'}, 'id': 'yVkZcGgV'} + {'execute': 'device_add', 'arguments': {'id': 'port1', 'driver': 'virtserialport', 'name': 'port1', 'chardev': 'channel1', 'bus': 'virtio_serial_pci0.0', 'nr': 1}, 'id': '3dXdUgJA'} + {'execute': 'device_add', 'arguments': {'id': 'port2', 'driver': 'virtserialport', 'name': 'port2', 'chardev': 'channel2', 'bus': 'virtio_serial_pci0.0', 'nr': 1}, 'id': 'qLzcCkob'} + {'execute': 'device_add', 'arguments': {'id': 'port2', 'driver': 'virtserialport', 'name': 'port2', 'chardev': 'channel2', 'bus': 'virtio_serial_pci0.0', 'nr': 2}, 'id': 'qLzcCkob'} + +The leak stack: +Direct leak of 40 byte(s) in 1 object(s) allocated from: + #0 0x7f04a8008ae8 in __interceptor_malloc (/lib64/libasan.so.5+0xefae8) + #1 0x7f04a73cf1d5 in g_malloc (/lib64/libglib-2.0.so.0+0x531d5) + #2 0x56273eaee484 in aio_bh_new /mnt/sdb/backup/qemu/util/async.c:125 + #3 0x56273eafe9a8 in qemu_bh_new /mnt/sdb/backup/qemu/util/main-loop.c:532 + #4 0x56273d52e62e in virtser_port_device_realize /mnt/sdb/backup/qemu/hw/char/virtio-serial-bus.c:946 + #5 0x56273dcc5040 in device_set_realized /mnt/sdb/backup/qemu/hw/core/qdev.c:891 + #6 0x56273e5ebbce in property_set_bool /mnt/sdb/backup/qemu/qom/object.c:2238 + #7 0x56273e5e5a9c in object_property_set /mnt/sdb/backup/qemu/qom/object.c:1324 + #8 0x56273e5ef5f8 in object_property_set_qobject /mnt/sdb/backup/qemu/qom/qom-qobject.c:26 + #9 0x56273e5e5e6a in object_property_set_bool /mnt/sdb/backup/qemu/qom/object.c:1390 + #10 0x56273daa40de in qdev_device_add /mnt/sdb/backup/qemu/qdev-monitor.c:680 + #11 0x56273daa53e9 in qmp_device_add /mnt/sdb/backup/qemu/qdev-monitor.c:805 + +Fixes: 199646d81522509ac2dba6d28c31e8c7d807bc93 +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Reviewed-by: Markus Armbruster +Reviewed-by: Amit Shah +Message-Id: <20200309021738.30072-1-pannengyuan@huawei.com> +Reviewed-by: Laurent Vivier +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Peng Liang +--- + hw/char/virtio-serial-bus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index f7a54f261b21..2d23dae6d2b7 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -940,7 +940,6 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + Error *err = NULL; + + port->vser = bus->vser; +- port->bh = qemu_bh_new(flush_queued_data_bh, port); + + assert(vsc->have_data); + +@@ -989,6 +988,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + ++ port->bh = qemu_bh_new(flush_queued_data_bh, port); + port->elem = NULL; + } + +-- +2.26.2 + diff --git a/virtio_blk-Add-support-for-retry-on-errors.patch b/virtio_blk-Add-support-for-retry-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..e7d8efd7ed99ae35f5309bee2e90bedfcc1f5e86 --- /dev/null +++ b/virtio_blk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,89 @@ +From f3158cc327d435939d87ecee23485d082ebf3ba2 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:53 +0800 +Subject: [PATCH] virtio_blk: Add support for retry on errors + +Insert failed requests into device's list for later retry and handle +queued requests to implement retry_request_cb. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/block/virtio-blk.c | 21 ++++++++++++++++++--- + 1 file changed, 18 insertions(+), 3 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ddf525b9d7..2db9804cfe 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -101,6 +101,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + block_acct_failed(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ req->mr_next = NULL; ++ req->next = s->rq; ++ s->rq = req; + } + + blk_error_action(s->blk, action, is_read, error); +@@ -142,6 +146,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -161,6 +166,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -183,6 +189,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + if (is_write_zeroes) { + block_acct_done(blk_get_stats(s->blk), &req->acct); +@@ -811,12 +818,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + + void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) + { +- VirtIOBlockReq *req = s->rq; ++ VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- s->rq = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); ++ req = s->rq; ++ s->rq = NULL; + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1101,8 +1108,16 @@ static void virtio_blk_resize(void *opaque) + virtio_notify_config(vdev); + } + ++static void virtio_blk_retry_request(void *opaque) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(opaque); ++ ++ virtio_blk_process_queued_requests(s, false); ++} ++ + static const BlockDevOps virtio_block_ops = { + .resize_cb = virtio_blk_resize, ++ .retry_request_cb = virtio_blk_retry_request, + }; + + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +-- +2.27.0 + diff --git a/vl-Don-t-mismatch-g_strsplit-g_free.patch b/vl-Don-t-mismatch-g_strsplit-g_free.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc1f4cc484e8b27af68fa7c533ce338fdfbcf7ad --- /dev/null +++ b/vl-Don-t-mismatch-g_strsplit-g_free.patch @@ -0,0 +1,56 @@ +From cad4a99e8cab2fe581fb2c6c1421f5547b451e96 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Fri, 10 Jan 2020 17:17:09 +0800 +Subject: [PATCH] vl: Don't mismatch g_strsplit()/g_free() + +It's a mismatch between g_strsplit and g_free, it will cause a memory leak as follow: + +[root@localhost]# ./aarch64-softmmu/qemu-system-aarch64 -accel help +Accelerators supported in QEMU binary: +tcg +kvm +================================================================= +==1207900==ERROR: LeakSanitizer: detected memory leaks + +Direct leak of 8 byte(s) in 2 object(s) allocated from: + #0 0xfffd700231cb in __interceptor_malloc (/lib64/libasan.so.4+0xd31cb) + #1 0xfffd6ec57163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) + #2 0xfffd6ec724d7 in g_strndup (/lib64/libglib-2.0.so.0+0x724d7) + #3 0xfffd6ec73d3f in g_strsplit (/lib64/libglib-2.0.so.0+0x73d3f) + #4 0xaaab66be5077 in main /mnt/sdc/qemu-master/qemu-4.2.0-rc0/vl.c:3517 + #5 0xfffd6e140b9f in __libc_start_main (/lib64/libc.so.6+0x20b9f) + #6 0xaaab66bf0f53 (./build/aarch64-softmmu/qemu-system-aarch64+0x8a0f53) + +Direct leak of 2 byte(s) in 2 object(s) allocated from: + #0 0xfffd700231cb in __interceptor_malloc (/lib64/libasan.so.4+0xd31cb) + #1 0xfffd6ec57163 in g_malloc (/lib64/libglib-2.0.so.0+0x57163) + #2 0xfffd6ec7243b in g_strdup (/lib64/libglib-2.0.so.0+0x7243b) + #3 0xfffd6ec73e6f in g_strsplit (/lib64/libglib-2.0.so.0+0x73e6f) + #4 0xaaab66be5077 in main /mnt/sdc/qemu-master/qemu-4.2.0-rc0/vl.c:3517 + #5 0xfffd6e140b9f in __libc_start_main (/lib64/libc.so.6+0x20b9f) + #6 0xaaab66bf0f53 (./build/aarch64-softmmu/qemu-system-aarch64+0x8a0f53) + +Reported-by: Euler Robot +Signed-off-by: Pan Nengyuan +Message-Id: <20200110091710.53424-2-pannengyuan@huawei.com> +Signed-off-by: Paolo Bonzini +--- + vl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/vl.c b/vl.c +index b426b32134..cec0bfdb44 100644 +--- a/vl.c ++++ b/vl.c +@@ -3532,7 +3532,7 @@ int main(int argc, char **argv, char **envp) + gchar **optname = g_strsplit(typename, + ACCEL_CLASS_SUFFIX, 0); + printf("%s\n", optname[0]); +- g_free(optname); ++ g_strfreev(optname); + } + g_free(typename); + } +-- +2.27.0 + diff --git a/vmstate-add-qom-interface-to-get-id.patch b/vmstate-add-qom-interface-to-get-id.patch new file mode 100644 index 0000000000000000000000000000000000000000..53a004405a907109dfb0bbc9354a3b0ef979846f --- /dev/null +++ b/vmstate-add-qom-interface-to-get-id.patch @@ -0,0 +1,210 @@ +From d771fca664e40c7d7ec5dfa2c656a282bff705b7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 28 Aug 2019 16:00:19 +0400 +Subject: [PATCH] vmstate: add qom interface to get id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add an interface to get the instance id, instead of depending on +Device and qdev_get_dev_path(). + +Signed-off-by: Marc-André Lureau +Reviewed-by: Daniel P. Berrangé +Acked-by: Dr. David Alan Gilbert +--- + MAINTAINERS | 2 ++ + hw/core/Makefile.objs | 1 + + hw/core/qdev.c | 14 +++++++++++++ + hw/core/vmstate-if.c | 23 +++++++++++++++++++++ + include/hw/vmstate-if.h | 40 ++++++++++++++++++++++++++++++++++++ + include/migration/register.h | 2 ++ + include/migration/vmstate.h | 2 ++ + tests/Makefile.include | 1 + + 8 files changed, 85 insertions(+) + create mode 100644 hw/core/vmstate-if.c + create mode 100644 include/hw/vmstate-if.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index d6de200453..e2d74d7ec3 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2135,6 +2135,8 @@ Migration + M: Juan Quintela + M: Dr. David Alan Gilbert + S: Maintained ++F: hw/core/vmstate-if.c ++F: include/hw/vmstate-if.h + F: include/migration/ + F: migration/ + F: scripts/vmstate-static-checker.py +diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs +index f8481d959f..54c51583d8 100644 +--- a/hw/core/Makefile.objs ++++ b/hw/core/Makefile.objs +@@ -8,6 +8,7 @@ common-obj-y += irq.o + common-obj-y += hotplug.o + common-obj-$(CONFIG_SOFTMMU) += nmi.o + common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o ++common-obj-y += vmstate-if.o + + common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o + common-obj-$(CONFIG_XILINX_AXI) += stream.o +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index 4b32f2f46d..13931b1117 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -1048,9 +1048,18 @@ static void device_unparent(Object *obj) + } + } + ++static char * ++device_vmstate_if_get_id(VMStateIf *obj) ++{ ++ DeviceState *dev = DEVICE(obj); ++ ++ return qdev_get_dev_path(dev); ++} ++ + static void device_class_init(ObjectClass *class, void *data) + { + DeviceClass *dc = DEVICE_CLASS(class); ++ VMStateIfClass *vc = VMSTATE_IF_CLASS(class); + + class->unparent = device_unparent; + +@@ -1062,6 +1071,7 @@ static void device_class_init(ObjectClass *class, void *data) + */ + dc->hotpluggable = true; + dc->user_creatable = true; ++ vc->get_id = device_vmstate_if_get_id; + } + + void device_class_set_parent_reset(DeviceClass *dc, +@@ -1119,6 +1129,10 @@ static const TypeInfo device_type_info = { + .class_init = device_class_init, + .abstract = true, + .class_size = sizeof(DeviceClass), ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_VMSTATE_IF }, ++ { } ++ } + }; + + static void qdev_register_types(void) +diff --git a/hw/core/vmstate-if.c b/hw/core/vmstate-if.c +new file mode 100644 +index 0000000000..bf453620fe +--- /dev/null ++++ b/hw/core/vmstate-if.c +@@ -0,0 +1,23 @@ ++/* ++ * VMState interface ++ * ++ * Copyright (c) 2009-2019 Red Hat Inc ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/vmstate-if.h" ++ ++static const TypeInfo vmstate_if_info = { ++ .name = TYPE_VMSTATE_IF, ++ .parent = TYPE_INTERFACE, ++ .class_size = sizeof(VMStateIfClass), ++}; ++ ++static void vmstate_register_types(void) ++{ ++ type_register_static(&vmstate_if_info); ++} ++ ++type_init(vmstate_register_types); +diff --git a/include/hw/vmstate-if.h b/include/hw/vmstate-if.h +new file mode 100644 +index 0000000000..8ff7f0f292 +--- /dev/null ++++ b/include/hw/vmstate-if.h +@@ -0,0 +1,40 @@ ++/* ++ * VMState interface ++ * ++ * Copyright (c) 2009-2019 Red Hat Inc ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef VMSTATE_IF_H ++#define VMSTATE_IF_H ++ ++#include "qom/object.h" ++ ++#define TYPE_VMSTATE_IF "vmstate-if" ++ ++#define VMSTATE_IF_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VMStateIfClass, (klass), TYPE_VMSTATE_IF) ++#define VMSTATE_IF_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VMStateIfClass, (obj), TYPE_VMSTATE_IF) ++#define VMSTATE_IF(obj) \ ++ INTERFACE_CHECK(VMStateIf, (obj), TYPE_VMSTATE_IF) ++ ++typedef struct VMStateIf VMStateIf; ++ ++typedef struct VMStateIfClass { ++ InterfaceClass parent_class; ++ ++ char * (*get_id)(VMStateIf *obj); ++} VMStateIfClass; ++ ++static inline char *vmstate_if_get_id(VMStateIf *vmif) ++{ ++ if (!vmif) { ++ return NULL; ++ } ++ ++ return VMSTATE_IF_GET_CLASS(vmif)->get_id(vmif); ++} ++ ++#endif /* VMSTATE_IF_H */ +diff --git a/include/migration/register.h b/include/migration/register.h +index f3ba10b6ef..158130c8c4 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -14,6 +14,8 @@ + #ifndef MIGRATION_REGISTER_H + #define MIGRATION_REGISTER_H + ++#include "hw/vmstate-if.h" ++ + typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ + SaveStateHandler *save_state; +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 8abd2e3b80..8cc1e19fd9 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -27,6 +27,8 @@ + #ifndef QEMU_VMSTATE_H + #define QEMU_VMSTATE_H + ++#include "hw/vmstate-if.h" ++ + typedef struct VMStateInfo VMStateInfo; + typedef struct VMStateDescription VMStateDescription; + typedef struct VMStateField VMStateField; +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 3be60ab999..1c7772a230 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -566,6 +566,7 @@ tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ + hw/core/irq.o \ + hw/core/fw-path-provider.o \ + hw/core/reset.o \ ++ hw/core/vmstate-if.o \ + $(test-qapi-obj-y) + tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ + migration/vmstate.o migration/vmstate-types.o migration/qemu-file.o \ +-- +2.27.0 + diff --git a/vmxcap-correct-the-name-of-the-variables.patch b/vmxcap-correct-the-name-of-the-variables.patch new file mode 100644 index 0000000000000000000000000000000000000000..3a402dfa1e6908d301ff51e2499af5b3443e3014 --- /dev/null +++ b/vmxcap-correct-the-name-of-the-variables.patch @@ -0,0 +1,44 @@ +From de8779d10794312d1eb56dda5936df7ad6e3c87f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 1 Jul 2019 16:51:24 +0200 +Subject: [PATCH] vmxcap: correct the name of the variables + +The low bits are 1 if the control must be one, the high bits +are 1 if the control can be one. Correct the variable names +as they are very confusing. + +Signed-off-by: Paolo Bonzini +--- + scripts/kvm/vmxcap | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 99a8146aaa..2db683215d 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -51,15 +51,15 @@ class Control(object): + return (val & 0xffffffff, val >> 32) + def show(self): + print(self.name) +- mbz, mb1 = self.read2(self.cap_msr) +- tmbz, tmb1 = 0, 0 ++ mb1, cb1 = self.read2(self.cap_msr) ++ tmb1, tcb1 = 0, 0 + if self.true_cap_msr: +- tmbz, tmb1 = self.read2(self.true_cap_msr) ++ tmb1, tcb1 = self.read2(self.true_cap_msr) + for bit in sorted(self.bits.keys()): +- zero = not (mbz & (1 << bit)) +- one = mb1 & (1 << bit) +- true_zero = not (tmbz & (1 << bit)) +- true_one = tmb1 & (1 << bit) ++ zero = not (mb1 & (1 << bit)) ++ one = cb1 & (1 << bit) ++ true_zero = not (tmb1 & (1 << bit)) ++ true_one = tcb1 & (1 << bit) + s= '?' + if (self.true_cap_msr and true_zero and true_one + and one and not zero): +-- +2.27.0 + diff --git a/vpc-Return-0-from-vpc_co_create-on-success.patch b/vpc-Return-0-from-vpc_co_create-on-success.patch new file mode 100644 index 0000000000000000000000000000000000000000..46fbd90d1bd39f5549ce5d0185d58bbd437a82aa --- /dev/null +++ b/vpc-Return-0-from-vpc_co_create-on-success.patch @@ -0,0 +1,49 @@ +From 97c478c355fee96eb2b740313f50561e69b6f305 Mon Sep 17 00:00:00 2001 +From: Max Reitz +Date: Mon, 2 Sep 2019 21:33:16 +0200 +Subject: [PATCH] vpc: Return 0 from vpc_co_create() on success + +blockdev_create_run() directly uses .bdrv_co_create()'s return value as +the job's return value. Jobs must return 0 on success, not just any +nonnegative value. Therefore, using blockdev-create for VPC images may +currently fail as the vpc driver may return a positive integer. + +Because there is no point in returning a positive integer anywhere in +the block layer (all non-negative integers are generally treated as +complete success), we probably do not want to add more such cases. +Therefore, fix this problem by making the vpc driver always return 0 in +case of success. + +Suggested-by: Kevin Wolf +Cc: qemu-stable@nongnu.org +Signed-off-by: Max Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 1a37e3124407b5a145d44478d3ecbdb89c63789f) +Signed-off-by: Michael Roth +--- + block/vpc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/block/vpc.c b/block/vpc.c +index d4776ee8a5..3a88e28e2b 100644 +--- a/block/vpc.c ++++ b/block/vpc.c +@@ -885,6 +885,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, + goto fail; + } + ++ ret = 0; + fail: + return ret; + } +@@ -908,7 +909,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, + return ret; + } + +- return ret; ++ return 0; + } + + static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts, +-- +2.23.0 diff --git a/vtimer-Drop-vtimer-virtual-timer-adjust.patch b/vtimer-Drop-vtimer-virtual-timer-adjust.patch new file mode 100644 index 0000000000000000000000000000000000000000..726498fb6c778f8d5739e9614ae451d54a11bb56 --- /dev/null +++ b/vtimer-Drop-vtimer-virtual-timer-adjust.patch @@ -0,0 +1,144 @@ +From b1782119bcfac96d8a541d8d60ee00f954d721db Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Wed, 27 May 2020 17:48:54 +0800 +Subject: [PATCH] vtimer: Drop vtimer virtual timer adjust + +This patch drops the vtimer virtual timer adjust, cross version migration +from openEuler qemu-4.0.1 to qemu-4.1.0 is not supported as a consequence. + +By default openEuler qemu-4.1.0 use kvm_adjvtime as the virtual timer. + +Signed-off-by: Ying Fang + +diff --git a/cpus.c b/cpus.c +index 6a28bdef..927a00aa 100644 +--- a/cpus.c ++++ b/cpus.c +@@ -1066,34 +1066,6 @@ void cpu_synchronize_all_pre_loadvm(void) + } + } + +-#ifdef __aarch64__ +-static bool kvm_adjvtime_enabled(CPUState *cs) +-{ +- ARMCPU *cpu = ARM_CPU(cs); +- return cpu->kvm_adjvtime == true; +-} +- +-static void get_vcpu_timer_tick(CPUState *cs) +-{ +- CPUARMState *env = &ARM_CPU(cs)->env; +- int err; +- struct kvm_one_reg reg; +- uint64_t timer_tick; +- +- reg.id = KVM_REG_ARM_TIMER_CNT; +- reg.addr = (uintptr_t) &timer_tick; +- +- err = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); +- if (err < 0) { +- error_report("get vcpu tick failed, ret = %d", err); +- env->vtimer = 0; +- return; +- } +- env->vtimer = timer_tick; +- return; +-} +-#endif +- + static int do_vm_stop(RunState state, bool send_stop) + { + int ret = 0; +@@ -1101,17 +1073,6 @@ static int do_vm_stop(RunState state, bool send_stop) + if (runstate_is_running()) { + cpu_disable_ticks(); + pause_all_vcpus(); +-#ifdef __aarch64__ +- /* vtimer adjust is used in openEuler qemu-4.0.1, however kvm_adjvtime +- * is introduced in openEuler qemu-4.1.0. To maintain the compatibility +- * and enable cross version migration, let's enable vtimer adjust only +- * if kvm_adjvtime is not enabled, otherwise there may be conflicts +- * between vtimer adjust and kvm_adjvtime. +- */ +- if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { +- get_vcpu_timer_tick(first_cpu); +- } +-#endif + runstate_set(state); + vm_state_notify(0, state); + if (send_stop) { +@@ -1957,46 +1918,11 @@ void cpu_resume(CPUState *cpu) + qemu_cpu_kick(cpu); + } + +-#ifdef __aarch64__ +- +-static void set_vcpu_timer_tick(CPUState *cs) +-{ +- CPUARMState *env = &ARM_CPU(cs)->env; +- +- if (env->vtimer == 0) { +- return; +- } +- +- int err; +- struct kvm_one_reg reg; +- uint64_t timer_tick = env->vtimer; +- env->vtimer = 0; +- +- reg.id = KVM_REG_ARM_TIMER_CNT; +- reg.addr = (uintptr_t) &timer_tick; +- +- err = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); +- if (err < 0) { +- error_report("Set vcpu tick failed, ret = %d", err); +- return; +- } +- return; +-} +-#endif +- + void resume_all_vcpus(void) + { + CPUState *cpu; + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); +-#ifdef __aarch64__ +- /* Enable vtimer adjust only if kvm_adjvtime is not enabled, otherwise +- * there may be conflicts between vtimer adjust and kvm_adjvtime. +- */ +- if (first_cpu && !kvm_adjvtime_enabled(first_cpu)) { +- set_vcpu_timer_tick(first_cpu); +- } +-#endif + CPU_FOREACH(cpu) { + cpu_resume(cpu); + } +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index aec6a214..86eb79cd 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -262,8 +262,6 @@ typedef struct CPUARMState { + uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + + +- uint64_t vtimer; /* Timer tick when vcpu stop */ +- + /* System control coprocessor (cp15) */ + struct { + uint32_t c0_cpuid; +diff --git a/target/arm/machine.c b/target/arm/machine.c +index ec28b839..ee3c59a6 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -814,7 +814,6 @@ const VMStateDescription vmstate_arm_cpu = { + VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + VMSTATE_UINT32(env.exception.fsr, ARMCPU), + VMSTATE_UINT64(env.exception.vaddress, ARMCPU), +- VMSTATE_UINT64(env.vtimer, ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), + { +-- +2.23.0 + diff --git a/vtimer-compat-cross-version-migration-from-v4.0.1.patch b/vtimer-compat-cross-version-migration-from-v4.0.1.patch new file mode 100644 index 0000000000000000000000000000000000000000..f452948fd29818c9551899e5044de1e3b33bc235 --- /dev/null +++ b/vtimer-compat-cross-version-migration-from-v4.0.1.patch @@ -0,0 +1,41 @@ +From aec34c33730c36b34e4442548885463f57100e13 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 8 May 2020 11:25:28 +0800 +Subject: [PATCH] vtimer: compat cross version migration from v4.0.1 + +vtimer feature was added to qemu v4.0.1 to record timer tick when vcpu +is stopped. However this feature is discared and the new virtual time +adjustment is introduced. + +This patch add the missing vtimer parameter to ARMCPUState in order +to compat cross version migration fromm v4.0.1 openEuler 2003 lts release. + +Singed-off-by: Ying Fang + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 219c222b..2609113d 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -261,6 +261,8 @@ typedef struct CPUARMState { + uint64_t elr_el[4]; /* AArch64 exception link regs */ + uint64_t sp_el[4]; /* AArch64 banked stack pointers */ + ++ uint64_t vtimer; /* Timer tick when vcpu is stopped */ ++ + /* System control coprocessor (cp15) */ + struct { + uint32_t c0_cpuid; +diff --git a/target/arm/machine.c b/target/arm/machine.c +index ee3c59a6..ec28b839 100644 +--- a/target/arm/machine.c ++++ b/target/arm/machine.c +@@ -814,6 +814,7 @@ const VMStateDescription vmstate_arm_cpu = { + VMSTATE_UINT32(env.exception.syndrome, ARMCPU), + VMSTATE_UINT32(env.exception.fsr, ARMCPU), + VMSTATE_UINT64(env.exception.vaddress, ARMCPU), ++ VMSTATE_UINT64(env.vtimer, ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_PHYS], ARMCPU), + VMSTATE_TIMER_PTR(gt_timer[GTIMER_VIRT], ARMCPU), + { +-- +2.23.0 diff --git a/x86-Intel-AVX512_BF16-feature-enabling.patch b/x86-Intel-AVX512_BF16-feature-enabling.patch new file mode 100644 index 0000000000000000000000000000000000000000..175190f10c71a4670f32ab3d16a49fee127e1c29 --- /dev/null +++ b/x86-Intel-AVX512_BF16-feature-enabling.patch @@ -0,0 +1,179 @@ +From e2fdc78f93d61be487c03a782aef6fdd8b26fa7e Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Thu, 25 Jul 2019 14:14:16 +0800 +Subject: [PATCH] x86: Intel AVX512_BF16 feature enabling + +Intel CooperLake cpu adds AVX512_BF16 instruction, defining as +CPUID.(EAX=7,ECX=1):EAX[bit 05]. + +The patch adds a property for setting the subleaf of CPUID leaf 7 in +case that people would like to specify it. + +The release spec link as follows, +https://software.intel.com/sites/default/files/managed/c5/15/\ +architecture-instruction-set-extensions-programming-reference.pdf + +Signed-off-by: Jing Liu +Signed-off-by: Paolo Bonzini + +Signed-off-by: Jingyi Wang +--- + target/i386/cpu.c | 39 ++++++++++++++++++++++++++++++++++++++- + target/i386/cpu.h | 7 +++++++ + target/i386/kvm.c | 3 ++- + 3 files changed, 47 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 19751e37a7..1ade90c28b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -770,6 +770,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + /* CPUID_7_0_ECX_OSPKE is dynamic */ \ + CPUID_7_0_ECX_LA57) + #define TCG_7_0_EDX_FEATURES 0 ++#define TCG_7_1_EAX_FEATURES 0 + #define TCG_APM_FEATURES 0 + #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT + #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) +@@ -1095,6 +1096,25 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + }, + .tcg_features = TCG_7_0_EDX_FEATURES, + }, ++ [FEAT_7_1_EAX] = { ++ .type = CPUID_FEATURE_WORD, ++ .feat_names = { ++ NULL, NULL, NULL, NULL, ++ NULL, "avx512-bf16", NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ NULL, NULL, NULL, NULL, ++ }, ++ .cpuid = { ++ .eax = 7, ++ .needs_ecx = true, .ecx = 1, ++ .reg = R_EAX, ++ }, ++ .tcg_features = TCG_7_1_EAX_FEATURES, ++ }, + [FEAT_8000_0007_EDX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { +@@ -4292,13 +4312,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + case 7: + /* Structured Extended Feature Flags Enumeration Leaf */ + if (count == 0) { +- *eax = 0; /* Maximum ECX value for sub-leaves */ ++ /* Maximum ECX value for sub-leaves */ ++ *eax = env->cpuid_level_func7; + *ebx = env->features[FEAT_7_0_EBX]; /* Feature flags */ + *ecx = env->features[FEAT_7_0_ECX]; /* Feature flags */ + if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { + *ecx |= CPUID_7_0_ECX_OSPKE; + } + *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ ++ } else if (count == 1) { ++ *eax = env->features[FEAT_7_1_EAX]; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; + } else { + *eax = 0; + *ebx = 0; +@@ -4948,6 +4974,11 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w) + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel2, eax); + break; + } ++ ++ if (eax == 7) { ++ x86_cpu_adjust_level(cpu, &env->cpuid_min_level_func7, ++ fi->cpuid.ecx); ++ } + } + + /* Calculate XSAVE components based on the configured CPU feature flags */ +@@ -5066,6 +5097,7 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + x86_cpu_adjust_feat_level(cpu, FEAT_1_ECX); + x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); ++ x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); + x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_ECX); + x86_cpu_adjust_feat_level(cpu, FEAT_8000_0007_EDX); +@@ -5097,6 +5129,9 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) + } + + /* Set cpuid_*level* based on cpuid_min_*level, if not explicitly set */ ++ if (env->cpuid_level_func7 == UINT32_MAX) { ++ env->cpuid_level_func7 = env->cpuid_min_level_func7; ++ } + if (env->cpuid_level == UINT32_MAX) { + env->cpuid_level = env->cpuid_min_level; + } +@@ -5868,6 +5903,8 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), ++ DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, ++ UINT32_MAX), + DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), + DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), + DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, UINT32_MAX), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8b3dc5533e..488b4dc778 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -479,6 +479,7 @@ typedef enum FeatureWord { + FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ + FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ + FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ ++ FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */ + FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ + FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ + FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ +@@ -692,6 +693,8 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) /*Core Capability*/ + #define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ + ++#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) /* AVX512 BFloat16 Instruction */ ++ + #define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and + do not invalidate cache */ + #define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ +@@ -1322,6 +1325,10 @@ typedef struct CPUX86State { + /* Fields after this point are preserved across CPU reset. */ + + /* processor features (e.g. for CPUID insn) */ ++ /* Minimum cpuid leaf 7 value */ ++ uint32_t cpuid_level_func7; ++ /* Actual cpuid leaf 7 value */ ++ uint32_t cpuid_min_level_func7; + /* Minimum level/xlevel/xlevel2, based on CPU model + features */ + uint32_t cpuid_min_level, cpuid_min_xlevel, cpuid_min_xlevel2; + /* Maximum level/xlevel/xlevel2 value for auto-assignment: */ +diff --git a/target/i386/kvm.c b/target/i386/kvm.c +index dbbb13772a..f55d4b4b97 100644 +--- a/target/i386/kvm.c ++++ b/target/i386/kvm.c +@@ -1497,6 +1497,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; ++ case 0x7: + case 0x14: { + uint32_t times; + +@@ -1509,7 +1510,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x14,ecx:0x%x)\n", j); ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; +-- +2.27.0 + diff --git a/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch b/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc17f48b7a395bafffaf7ef9763d04bff110af0a --- /dev/null +++ b/x86-do-not-advertise-die-id-in-query-hotpluggbale-cp.patch @@ -0,0 +1,60 @@ +From 725dfa851f8e1de8653f41a4bd38c7f98757eb40 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 2 Sep 2019 08:02:22 -0400 +Subject: [PATCH] x86: do not advertise die-id in query-hotpluggbale-cpus if + '-smp dies' is not set + +Commit 176d2cda0 (i386/cpu: Consolidate die-id validity in smp context) added +new 'die-id' topology property to CPUs and exposed it via QMP command +query-hotpluggable-cpus, which broke -device/device_add cpu-foo for existing +users that do not support die-id/dies yet. That's would be fine if it happened +to new machine type only but it also happened to old machine types, +which breaks migration from old QEMU to the new one, for example following CLI: + + OLD-QEMU -M pc-i440fx-4.0 -smp 1,max_cpus=2 \ + -device qemu64-x86_64-cpu,socket-id=1,core-id=0,thread-id +is not able to start with new QEMU, complaining about invalid die-id. + +After discovering regression, the patch + "pc: Don't make die-id mandatory unless necessary" +makes die-id optional so old CLI would work. + +However it's not enough as new QEMU still exposes die-id via query-hotpluggbale-cpus +QMP command, so the users that started old machine type on new QEMU, using all +properties (including die-id) received from QMP command (as required), won't be +able to start old QEMU using the same properties since it doesn't support die-id. + +Fix it by hiding die-id in query-hotpluggbale-cpus for all machine types in case +'-smp dies' is not provided on CLI or -smp dies = 1', in which case smp_dies == 1 +and APIC ID is calculated in default way (as it was before DIE support) so we won't +need compat code as in both cases the topology provided to guest via CPUID is the same. + +Signed-off-by: Igor Mammedov +Message-Id: <20190902120222.6179-1-imammedo@redhat.com> +Reviewed-by: Eduardo Habkost +Signed-off-by: Eduardo Habkost +(cherry picked from commit c6c1bb89fb46f3b88f832e654cf5a6f7941aac51) +Signed-off-by: Michael Roth +--- + hw/i386/pc.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 947f81070f..d011733ff7 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2887,8 +2887,10 @@ static const CPUArchIdList *pc_possible_cpu_arch_ids(MachineState *ms) + ms->smp.threads, &topo); + ms->possible_cpus->cpus[i].props.has_socket_id = true; + ms->possible_cpus->cpus[i].props.socket_id = topo.pkg_id; +- ms->possible_cpus->cpus[i].props.has_die_id = true; +- ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ++ if (pcms->smp_dies > 1) { ++ ms->possible_cpus->cpus[i].props.has_die_id = true; ++ ms->possible_cpus->cpus[i].props.die_id = topo.die_id; ++ } + ms->possible_cpus->cpus[i].props.has_core_id = true; + ms->possible_cpus->cpus[i].props.core_id = topo.core_id; + ms->possible_cpus->cpus[i].props.has_thread_id = true; +-- +2.23.0 diff --git a/xhci-fix-valid.max_access_size-to-access-address-reg.patch b/xhci-fix-valid.max_access_size-to-access-address-reg.patch new file mode 100644 index 0000000000000000000000000000000000000000..466cbf2667efaf26cc65c160c8223659abb0c288 --- /dev/null +++ b/xhci-fix-valid.max_access_size-to-access-address-reg.patch @@ -0,0 +1,62 @@ +From a71d1847aa780b3c4062e582ab400a7fea0413b3 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 21 Jul 2020 10:33:22 +0200 +Subject: [PATCH 01/11] xhci: fix valid.max_access_size to access address + registers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +QEMU XHCI advertises AC64 (64-bit addressing) but doesn't allow +64-bit mode access in "runtime" and "operational" MemoryRegionOps. + +Set the max_access_size based on sizeof(dma_addr_t) as AC64 is set. + +XHCI specs: +"If the xHC supports 64-bit addressing (AC64 = ‘1’), then software +should write 64-bit registers using only Qword accesses. If a +system is incapable of issuing Qword accesses, then writes to the +64-bit address fields shall be performed using 2 Dword accesses; +low Dword-first, high-Dword second. If the xHC supports 32-bit +addressing (AC64 = ‘0’), then the high Dword of registers containing +64-bit address fields are unused and software should write addresses +using only Dword accesses" + +The problem has been detected with SLOF, as linux kernel always accesses +registers using 32-bit access even if AC64 is set and revealed by +5d971f9e6725 ("memory: Revert "memory: accept mismatching sizes in memory_region_access_valid"") + +Suggested-by: Alexey Kardashevskiy +Signed-off-by: Laurent Vivier +Message-id: 20200721083322.90651-1-lvivier@redhat.com +Signed-off-by: Gerd Hoffmann +Signed-off-by: BiaoXiang Ye +--- + hw/usb/hcd-xhci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index a21485fe..24565de1 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3171,7 +3171,7 @@ static const MemoryRegionOps xhci_oper_ops = { + .read = xhci_oper_read, + .write = xhci_oper_write, + .valid.min_access_size = 4, +- .valid.max_access_size = 4, ++ .valid.max_access_size = sizeof(dma_addr_t), + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +@@ -3187,7 +3187,7 @@ static const MemoryRegionOps xhci_runtime_ops = { + .read = xhci_runtime_read, + .write = xhci_runtime_write, + .valid.min_access_size = 4, +- .valid.max_access_size = 4, ++ .valid.max_access_size = sizeof(dma_addr_t), + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-- +2.27.0.dirty + diff --git a/xhci-recheck-slot-status.patch b/xhci-recheck-slot-status.patch new file mode 100644 index 0000000000000000000000000000000000000000..d05c3c8344802c788827334b2f48693ec4b72edb --- /dev/null +++ b/xhci-recheck-slot-status.patch @@ -0,0 +1,64 @@ +From 33d6a2bc0e432a85962b71bcb2c3b5eec39bf436 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Tue, 7 Jan 2020 09:36:06 +0100 +Subject: [PATCH] xhci: recheck slot status +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Factor out slot status check into a helper function. Add an additional +check after completing transfers. This is needed in case a guest +queues multiple transfers in a row and a device unplug happens while +qemu processes them. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1786413 +Signed-off-by: Gerd Hoffmann +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20200107083606.12393-1-kraxel@redhat.com +--- + hw/usb/hcd-xhci.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index 24565de1d1..4b42f53b9c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -1860,6 +1860,13 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, + xhci_kick_epctx(epctx, streamid); + } + ++static bool xhci_slot_ok(XHCIState *xhci, int slotid) ++{ ++ return (xhci->slots[slotid - 1].uport && ++ xhci->slots[slotid - 1].uport->dev && ++ xhci->slots[slotid - 1].uport->dev->attached); ++} ++ + static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + { + XHCIState *xhci = epctx->xhci; +@@ -1877,9 +1884,7 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + + /* If the device has been detached, but the guest has not noticed this + yet the 2 above checks will succeed, but we must NOT continue */ +- if (!xhci->slots[epctx->slotid - 1].uport || +- !xhci->slots[epctx->slotid - 1].uport->dev || +- !xhci->slots[epctx->slotid - 1].uport->dev->attached) { ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { + return; + } + +@@ -1986,6 +1991,10 @@ static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid) + } else { + xhci_fire_transfer(xhci, xfer, epctx); + } ++ if (!xhci_slot_ok(xhci, epctx->slotid)) { ++ /* surprise removal -> stop processing */ ++ break; ++ } + if (xfer->complete) { + /* update ring dequeue ptr */ + xhci_set_ep_state(xhci, epctx, stctx, epctx->state); +-- +2.27.0 + diff --git a/xics-Don-t-deassert-outputs.patch b/xics-Don-t-deassert-outputs.patch new file mode 100644 index 0000000000000000000000000000000000000000..083a9a2e885cd2b4c2d8fe701c2ad037b5bece00 --- /dev/null +++ b/xics-Don-t-deassert-outputs.patch @@ -0,0 +1,32 @@ +From 5b137b37ef7c4941200798cca99200e80ef17a01 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Wed, 4 Dec 2019 20:43:43 +0100 +Subject: [PATCH] xics: Don't deassert outputs + +The correct way to do this is to deassert the input pins on the CPU side. +This is the case since a previous change. + +Signed-off-by: Greg Kurz +Message-Id: <157548862298.3650476.1228720391270249433.stgit@bahia.lan> +Signed-off-by: David Gibson +--- + hw/intc/xics.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/intc/xics.c b/hw/intc/xics.c +index faa976e2f8..d2d377fc85 100644 +--- a/hw/intc/xics.c ++++ b/hw/intc/xics.c +@@ -303,9 +303,6 @@ static void icp_reset_handler(void *dev) + icp->pending_priority = 0xff; + icp->mfrr = 0xff; + +- /* Make all outputs are deasserted */ +- qemu_set_irq(icp->output, 0); +- + if (kvm_irqchip_in_kernel()) { + Error *local_err = NULL; + +-- +2.27.0 +